2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * syntax parsing engine
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
36 Last Modified: 2004-II-8
43 The task is to check the syntax of an input string. Input string is a stream of ASCII
44 characters terminated with a null-character ('\0'). Checking it using C language is
45 difficult and hard to implement without bugs. It is hard to maintain and make changes when
48 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
49 only small changes. Even use of macros does not solve the problem because macros cannot
50 erase the complexity of the problem.
52 The resolution is to create a new language that will be highly oriented to our task. Once
53 we describe a particular syntax, we are done. We can then focus on the code that implements
54 the language. The size and complexity of it is relatively small than the code that directly
57 First, we must implement our new language. Here, the language is implemented in C, but it
58 could also be implemented in any other language. The code is listed below. We must take
59 a good care that it is bug free. This is simple because the code is simple and clean.
61 Next, we must describe the syntax of our new language in itself. Once created and checked
62 manually that it is correct, we can use it to check another scripts.
64 Note that our new language loading code does not have to check the syntax. It is because we
65 assume that the script describing itself is correct, and other scripts can be syntactically
66 checked by the former script. The loading code must only do semantic checking which leads us to
67 simple resolving references.
72 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
73 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
74 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
75 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
76 definition. Specifier can be a symbol, string, character, character range or a special
77 keyword ".true" or ".false".
79 On the very beginning of the script there is a declaration of a root symbol and is in the form:
80 .syntax <root_symbol>;
81 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
82 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
83 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
84 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
85 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
86 true if any of the specifiers evaluates to true. If definition contains only one specifier,
87 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
89 If specifier is a ".true" keyword, it always evaluates to true.
91 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
92 when it does not evaluate to true.
94 Character range specifier is in the form:
95 '<first_character>' - '<second_character>'
96 If specifier is a character range, it evaluates to true if character in the stream is greater
97 or equal to <first_character> and less or equal to <second_character>. In that situation
98 the stream pointer is advanced to point to next character in the stream. All C-style escape
99 sequences are supported although trigraph sequences are not. The comparisions are performed
100 on 8-bit unsigned integers.
102 Character specifier is in the form:
104 It evaluates to true if the following character range specifier evaluates to true:
105 '<single_character>' - '<single_character>'
107 String specifier is in the form:
109 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
110 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
111 the following character specifier evaluates to true:
113 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
115 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
117 where <symbol> is defined as follows:
118 <symbol> <definition>; (2)
119 Construction (1) is replaced by the following code:
121 and declaration (2) is replaced by the following:
122 <symbol$1> <symbol$2> .or .true;
123 <symbol$2> <symbol> .and <symbol$1>;
124 <symbol> <definition>;
126 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
127 registers that can be accessed in the syn body. Each reg has its name and a default value.
128 The register is one byte wide. The C code can change the default value by calling
129 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
130 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
131 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
132 where <operator> can be == or !=. If the condition evaluates to false, the specifier
133 evaluates to .false. Otherwise it evalutes to the specifier.
138 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
139 All occurences of the characters in the first column are replaced with the corresponding
140 character in the second column.
142 Escape sequence Represents
143 ------------------------------------------------------------------------------------------------
151 \' Single quotation mark
152 \" Double quotation mark
154 \? Literal question mark
155 \ooo ASCII character in octal notation
156 \xhhh ASCII character in hexadecimal notation
157 ------------------------------------------------------------------------------------------------
162 Any specifier can be followed by a special construction that is executed when the specifier
163 evaluates to false. The construction is in the form:
165 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
167 .errtext <ERROR_TEXT> "<error_desc>"
168 When specifier evaluates to false and this construction is present, parsing is stopped
169 immediately and <error_desc> is returned as a result of parsing. The error position is also
170 returned and it is meant as an offset from the beggining of the stream to the character that
171 was valid so far. Example:
173 (**** syntax script ****)
176 .errtext MISSING_SEMICOLON "missing ';'"
177 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
178 .loop space .and '\0';
179 declaration "declare" .and .loop space .and identifier;
182 (**** sample code ****)
186 In the example above checking the sample code will result in error message "missing ';'" and
187 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
188 assure that there is no code after semicolon - only spaces.
189 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
190 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
191 the identifier name. The starting position is the error position. The lenght of the resulting
192 string is the position after invoking the symbol.
197 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
198 that evaluate to true. That is, every specifier and optional error construction can be followed
199 by a number of emit constructions that are in the form:
201 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
202 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
204 .emtcode <identifier> <hex_number>
206 When given specifier evaluates to true, all emits associated with the specifier are output
207 in order they were declared. A star means that last-read character should be output instead
208 of constant value. Example:
210 (**** syntax script ****)
213 .emtcode WORD_FOO 0x01
214 .emtcode WORD_BAR 0x02
215 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
216 FOO "foo" .and SPACE;
217 BAR "bar" .and SPACE;
220 (**** sample text 1 ****)
224 (**** sample text 2 ****)
228 For both samples the result will be one-element array. For first sample text it will be
229 value 1, for second - 0. Note that every text will be accepted because of presence of
230 .true as an alternative.
234 (**** syntax script ****)
237 .emtcode VARIABLE 0x01
238 declaration "declare" .and .loop space .and
239 identifier .emit VARIABLE .and (1)
240 .true .emit 0x00 .and (2)
241 .loop space .and ';';
243 identifier .loop id_char .emit *; (3)
244 id_char 'a'-'z' .or 'A'-'Z' .or '_';
246 (**** sample code ****)
250 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
251 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
252 to terminate the string with null to signal when the string ends. Specifier (3) outputs
253 all characters that make declared identifier. The result of sample code will be the
255 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
257 If .emit is followed by dollar $, it means that current position should be output. Current
258 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
259 first character consumed by the specifier associated with the .emit instruction. Current
260 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
264 static void mem_free (void **);
267 internal error messages
269 static const byte
*OUT_OF_MEMORY
= (byte
*) "internal error 1001: out of physical memory";
270 static const byte
*UNRESOLVED_REFERENCE
= (byte
*) "internal error 1002: unresolved reference '$'";
271 static const byte
*INVALID_GRAMMAR_ID
= (byte
*) "internal error 1003: invalid grammar object";
272 static const byte
*INVALID_REGISTER_NAME
= (byte
*) "internal error 1004: invalid register name: '$'";
274 static const byte
*error_message
= NULL
;
275 static byte
*error_param
= NULL
; /* this is inserted into error_message in place of $ */
276 static int error_position
= -1;
278 static byte
*unknown
= (byte
*) "???";
280 static void clear_last_error ()
282 /* reset error message */
283 error_message
= NULL
;
285 /* free error parameter - if error_param is a "???" don't free it - it's static */
286 if (error_param
!= unknown
)
287 mem_free ((void **) &error_param
);
291 /* reset error position */
295 static void set_last_error (const byte
*msg
, byte
*param
, int pos
)
297 /* error message can only be set only once */
298 if (error_message
!= NULL
)
309 error_param
= unknown
;
311 error_position
= pos
;
315 memory management routines
317 static void *mem_alloc (size_t size
)
319 void *ptr
= grammar_alloc_malloc (size
);
321 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
325 static void *mem_copy (void *dst
, const void *src
, size_t size
)
327 return grammar_memory_copy (dst
, src
, size
);
330 static void mem_free (void **ptr
)
332 grammar_alloc_free (*ptr
);
336 static void *mem_realloc (void *ptr
, size_t old_size
, size_t new_size
)
338 void *ptr2
= grammar_alloc_realloc (ptr
, old_size
, new_size
);
340 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
344 static byte
*str_copy_n (byte
*dst
, const byte
*src
, size_t max_len
)
346 return grammar_string_copy_n (dst
, src
, max_len
);
349 static byte
*str_duplicate (const byte
*str
)
351 byte
*new_str
= grammar_string_duplicate (str
);
353 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
357 static int str_equal (const byte
*str1
, const byte
*str2
)
359 return grammar_string_compare (str1
, str2
) == 0;
362 static int str_equal_n (const byte
*str1
, const byte
*str2
, unsigned int n
)
364 return grammar_string_compare_n (str1
, str2
, n
) == 0;
367 static unsigned int str_length (const byte
*str
)
369 return grammar_string_length (str
);
373 string to byte map typedef
375 typedef struct map_byte_
379 struct map_byte_
*next
;
382 static void map_byte_create (map_byte
**ma
)
384 *ma
= mem_alloc (sizeof (map_byte
));
393 /* XXX unfold the recursion */
394 static void map_byte_destroy (map_byte
**ma
)
398 map_byte_destroy (&(**ma
).next
);
399 mem_free ((void **) &(**ma
).key
);
400 mem_free ((void **) ma
);
404 static void map_byte_append (map_byte
**ma
, map_byte
**nm
)
412 searches the map for the specified key,
413 returns pointer to the element with the specified key if it exists
414 returns NULL otherwise
416 map_byte
*map_byte_locate (map_byte
**ma
, const byte
*key
)
420 if (str_equal ((**ma
).key
, key
))
426 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
431 searches the map for specified key,
432 if the key is matched, *data is filled with data associated with the key,
433 returns 0 if the key is matched,
436 static int map_byte_find (map_byte
**ma
, const byte
*key
, byte
*data
)
438 map_byte
*found
= map_byte_locate (ma
, key
);
450 regbyte context typedef
452 Each regbyte consists of its name and a default value. These are static and created at
453 grammar script compile-time, for example the following line:
454 .regbyte vertex_blend 0x00
455 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
456 When the script is executed, this regbyte can be accessed by name for read and write. When a
457 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
458 stack. The new entry contains information abot which regbyte it references and its new value.
459 When a given regbyte is accessed for read, the stack is searched top-down to find an
460 entry that references the regbyte. The first matching entry is used to return the current
461 value it holds. If no entry is found, the default value is returned.
463 typedef struct regbyte_ctx_
466 byte m_current_value
;
467 struct regbyte_ctx_
*m_prev
;
470 static void regbyte_ctx_create (regbyte_ctx
**re
)
472 *re
= mem_alloc (sizeof (regbyte_ctx
));
475 (**re
).m_regbyte
= NULL
;
476 (**re
).m_prev
= NULL
;
480 static void regbyte_ctx_destroy (regbyte_ctx
**re
)
484 mem_free ((void **) re
);
488 static byte
regbyte_ctx_extract (regbyte_ctx
**re
, map_byte
*reg
)
490 /* first lookup in the register stack */
493 if ((**re
).m_regbyte
== reg
)
494 return (**re
).m_current_value
;
499 /* if not found - return the default value */
506 typedef enum emit_type_
508 et_byte
, /* explicit number */
509 et_stream
, /* eaten character */
510 et_position
/* current position */
514 emit destination typedef
516 typedef enum emit_dest_
518 ed_output
, /* write to the output buffer */
519 ed_regbyte
/* write a particular regbyte */
527 emit_dest m_emit_dest
;
528 emit_type m_emit_type
; /* ed_output */
529 byte m_byte
; /* et_byte */
530 map_byte
*m_regbyte
; /* ed_regbyte */
531 byte
*m_regname
; /* ed_regbyte - temporary */
532 struct emit_
*m_next
;
535 static void emit_create (emit
**em
)
537 *em
= mem_alloc (sizeof (emit
));
540 (**em
).m_emit_dest
= ed_output
;
541 (**em
).m_emit_type
= et_byte
;
542 (**em
).m_byte
= '\0';
543 (**em
).m_regbyte
= NULL
;
544 (**em
).m_regname
= NULL
;
545 (**em
).m_next
= NULL
;
549 static void emit_destroy (emit
**em
)
553 emit_destroy (&(**em
).m_next
);
554 mem_free ((void **) &(**em
).m_regname
);
555 mem_free ((void **) em
);
562 typedef struct error_
566 struct rule_
*m_token
;
569 static void error_create (error
**er
)
571 *er
= mem_alloc (sizeof (error
));
574 (**er
).m_text
= NULL
;
575 (**er
).m_token_name
= NULL
;
576 (**er
).m_token
= NULL
;
580 static void error_destroy (error
**er
)
584 mem_free ((void **) &(**er
).m_text
);
585 mem_free ((void **) &(**er
).m_token_name
);
586 mem_free ((void **) er
);
591 static byte
*error_get_token (error
*, struct dict_
*, const byte
*, unsigned int);
594 condition operand type typedef
596 typedef enum cond_oper_type_
598 cot_byte
, /* constant 8-bit unsigned integer */
599 cot_regbyte
/* pointer to byte register containing the current value */
603 condition operand typedef
605 typedef struct cond_oper_
607 cond_oper_type m_type
;
608 byte m_byte
; /* cot_byte */
609 map_byte
*m_regbyte
; /* cot_regbyte */
610 byte
*m_regname
; /* cot_regbyte - temporary */
614 condition type typedef
616 typedef enum cond_type_
628 cond_oper m_operands
[2];
631 static void cond_create (cond
**co
)
633 *co
= mem_alloc (sizeof (cond
));
636 (**co
).m_operands
[0].m_regname
= NULL
;
637 (**co
).m_operands
[1].m_regname
= NULL
;
641 static void cond_destroy (cond
**co
)
645 mem_free ((void **) &(**co
).m_operands
[0].m_regname
);
646 mem_free ((void **) &(**co
).m_operands
[1].m_regname
);
647 mem_free ((void **) co
);
652 specifier type typedef
654 typedef enum spec_type_
671 spec_type m_spec_type
;
672 byte m_byte
[2]; /* st_byte, st_byte_range */
673 byte
*m_string
; /* st_string */
674 struct rule_
*m_rule
; /* st_identifier, st_identifier_loop */
678 struct spec_
*m_next
;
681 static void spec_create (spec
**sp
)
683 *sp
= mem_alloc (sizeof (spec
));
686 (**sp
).m_spec_type
= st_false
;
687 (**sp
).m_byte
[0] = '\0';
688 (**sp
).m_byte
[1] = '\0';
689 (**sp
).m_string
= NULL
;
690 (**sp
).m_rule
= NULL
;
691 (**sp
).m_emits
= NULL
;
692 (**sp
).m_errtext
= NULL
;
693 (**sp
).m_cond
= NULL
;
694 (**sp
).m_next
= NULL
;
698 static void spec_destroy (spec
**sp
)
702 spec_destroy (&(**sp
).m_next
);
703 emit_destroy (&(**sp
).m_emits
);
704 error_destroy (&(**sp
).m_errtext
);
705 mem_free ((void **) &(**sp
).m_string
);
706 cond_destroy (&(**sp
).m_cond
);
707 mem_free ((void **) sp
);
711 static void spec_append (spec
**sp
, spec
**ns
)
735 struct rule_
*m_next
;
736 /* int m_referenced; */ /* for debugging purposes */
739 static void rule_create (rule
**ru
)
741 *ru
= mem_alloc (sizeof (rule
));
744 (**ru
).m_oper
= op_none
;
745 (**ru
).m_specs
= NULL
;
746 (**ru
).m_next
= NULL
;
747 /* (**ru).m_referenced = 0; */
751 static void rule_destroy (rule
**ru
)
755 rule_destroy (&(**ru
).m_next
);
756 spec_destroy (&(**ru
).m_specs
);
757 mem_free ((void **) ru
);
761 static void rule_append (rule
**ru
, rule
**nr
)
769 returns unique grammar id
771 static grammar
next_valid_grammar_id ()
773 static grammar id
= 0;
786 map_byte
*m_regbytes
;
788 struct dict_
*m_next
;
791 static void dict_create (dict
**di
)
793 *di
= mem_alloc (sizeof (dict
));
796 (**di
).m_rulez
= NULL
;
797 (**di
).m_syntax
= NULL
;
798 (**di
).m_string
= NULL
;
799 (**di
).m_regbytes
= NULL
;
800 (**di
).m_id
= next_valid_grammar_id ();
801 (**di
).m_next
= NULL
;
805 static void dict_destroy (dict
**di
)
809 rule_destroy (&(**di
).m_rulez
);
810 map_byte_destroy (&(**di
).m_regbytes
);
811 mem_free ((void **) di
);
815 static void dict_append (dict
**di
, dict
**nd
)
822 static void dict_find (dict
**di
, grammar key
, dict
**data
)
826 if ((**di
).m_id
== key
)
838 static dict
*g_dicts
= NULL
;
843 XXX this class is going to be replaced by a faster one, soon
845 typedef struct barray_
851 static void barray_create (barray
**ba
)
853 *ba
= mem_alloc (sizeof (barray
));
861 static void barray_destroy (barray
**ba
)
865 mem_free ((void **) &(**ba
).data
);
866 mem_free ((void **) ba
);
871 reallocates byte array to requested size,
872 returns 0 on success,
875 static int barray_resize (barray
**ba
, unsigned int nlen
)
881 mem_free ((void **) &(**ba
).data
);
889 new_pointer
= mem_realloc ((**ba
).data
, (**ba
).len
* sizeof (byte
), nlen
* sizeof (byte
));
892 (**ba
).data
= new_pointer
;
903 adds byte array pointed by *nb to the end of array pointed by *ba,
904 returns 0 on success,
907 static int barray_append (barray
**ba
, barray
**nb
)
909 const unsigned int len
= (**ba
).len
;
911 if (barray_resize (ba
, (**ba
).len
+ (**nb
).len
))
914 mem_copy ((**ba
).data
+ len
, (**nb
).data
, (**nb
).len
);
920 adds emit chain pointed by em to the end of array pointed by *ba,
921 returns 0 on success,
924 static int barray_push (barray
**ba
, emit
*em
, byte c
, unsigned int pos
, regbyte_ctx
**rbc
)
927 unsigned int count
= 0;
931 if (temp
->m_emit_dest
== ed_output
)
932 if (temp
->m_emit_type
== et_position
)
933 count
+= 4; /* position is a 32-bit unsigned integer */
940 if (barray_resize (ba
, (**ba
).len
+ count
))
945 if (em
->m_emit_dest
== ed_output
)
947 if (em
->m_emit_type
== et_byte
)
948 (**ba
).data
[(**ba
).len
- count
--] = em
->m_byte
;
949 else if (em
->m_emit_type
== et_stream
)
950 (**ba
).data
[(**ba
).len
- count
--] = c
;
951 else // em->type == et_position
952 (**ba
).data
[(**ba
).len
- count
--] = (byte
) pos
,
953 (**ba
).data
[(**ba
).len
- count
--] = (byte
) (pos
>> 8),
954 (**ba
).data
[(**ba
).len
- count
--] = (byte
) (pos
>> 16),
955 (**ba
).data
[(**ba
).len
- count
--] = (byte
) (pos
>> 24);
959 regbyte_ctx
*new_rbc
;
960 regbyte_ctx_create (&new_rbc
);
964 new_rbc
->m_prev
= *rbc
;
965 new_rbc
->m_regbyte
= em
->m_regbyte
;
968 if (em
->m_emit_type
== et_byte
)
969 new_rbc
->m_current_value
= em
->m_byte
;
970 else if (em
->m_emit_type
== et_stream
)
971 new_rbc
->m_current_value
= c
;
981 string to string map typedef
983 typedef struct map_str_
987 struct map_str_
*next
;
990 static void map_str_create (map_str
**ma
)
992 *ma
= mem_alloc (sizeof (map_str
));
1001 static void map_str_destroy (map_str
**ma
)
1005 map_str_destroy (&(**ma
).next
);
1006 mem_free ((void **) &(**ma
).key
);
1007 mem_free ((void **) &(**ma
).data
);
1008 mem_free ((void **) ma
);
1012 static void map_str_append (map_str
**ma
, map_str
**nm
)
1020 searches the map for specified key,
1021 if the key is matched, *data is filled with data associated with the key,
1022 returns 0 if the key is matched,
1025 static int map_str_find (map_str
**ma
, const byte
*key
, byte
**data
)
1029 if (str_equal ((**ma
).key
, key
))
1031 *data
= str_duplicate ((**ma
).data
);
1041 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1046 string to rule map typedef
1048 typedef struct map_rule_
1052 struct map_rule_
*next
;
1055 static void map_rule_create (map_rule
**ma
)
1057 *ma
= mem_alloc (sizeof (map_rule
));
1066 static void map_rule_destroy (map_rule
**ma
)
1070 map_rule_destroy (&(**ma
).next
);
1071 mem_free ((void **) &(**ma
).key
);
1072 mem_free ((void **) ma
);
1076 static void map_rule_append (map_rule
**ma
, map_rule
**nm
)
1084 searches the map for specified key,
1085 if the key is matched, *data is filled with data associated with the key,
1086 returns 0 if the is matched,
1089 static int map_rule_find (map_rule
**ma
, const byte
*key
, rule
**data
)
1093 if (str_equal ((**ma
).key
, key
))
1095 *data
= (**ma
).data
;
1103 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1108 returns 1 if given character is a white space,
1111 static int is_space (byte c
)
1113 return c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r';
1117 advances text pointer by 1 if character pointed by *text is a space,
1118 returns 1 if a space has been eaten,
1121 static int eat_space (const byte
**text
)
1123 if (is_space (**text
))
1134 returns 1 if text points to C-style comment start string "/*",
1137 static int is_comment_start (const byte
*text
)
1139 return text
[0] == '/' && text
[1] == '*';
1143 advances text pointer to first character after C-style comment block - if any,
1144 returns 1 if C-style comment block has been encountered and eaten,
1147 static int eat_comment (const byte
**text
)
1149 if (is_comment_start (*text
))
1151 /* *text points to comment block - skip two characters to enter comment body */
1153 /* skip any character except consecutive '*' and '/' */
1154 while (!((*text
)[0] == '*' && (*text
)[1] == '/'))
1156 /* skip those two terminating characters */
1166 advances text pointer to first character that is neither space nor C-style comment block
1168 static void eat_spaces (const byte
**text
)
1170 while (eat_space (text
) || eat_comment (text
))
1175 resizes string pointed by *ptr to successfully add character c to the end of the string,
1176 returns 0 on success,
1179 static int string_grow (byte
**ptr
, unsigned int *len
, byte c
)
1181 /* reallocate the string in 16-byte increments */
1182 if ((*len
& 0x0F) == 0x0F || *ptr
== NULL
)
1184 byte
*tmp
= mem_realloc (*ptr
, ((*len
+ 1) & ~0x0F) * sizeof (byte
),
1185 ((*len
+ 1 + 0x10) & ~0x0F) * sizeof (byte
));
1194 /* append given character */
1198 (*ptr
)[*len
] = '\0';
1204 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1207 static int is_identifier (byte c
)
1209 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || (c
>= '0' && c
<= '9') || c
== '_';
1213 copies characters from *text to *id until non-identifier character is encountered,
1214 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1215 text pointer is advanced to point past the copied identifier,
1216 returns 0 if identifier was successfully copied,
1219 static int get_identifier (const byte
**text
, byte
**id
)
1221 const byte
*t
= *text
;
1223 unsigned int len
= 0;
1225 if (string_grow (&p
, &len
, '\0'))
1228 /* loop while next character in buffer is valid for identifiers */
1229 while (is_identifier (*t
))
1231 if (string_grow (&p
, &len
, *t
++))
1233 mem_free ((void **) &p
);
1245 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1248 static int is_hex (byte c
)
1250 return (c
>= '0' && c
<= '9') || (c
>= 'A' && c
<= 'F') || (c
>= 'a' && c
<= 'f');
1254 returns value of passed character as if it was HEX digit
1256 static unsigned int hex2dec (byte c
)
1258 if (c
>= '0' && c
<= '9')
1260 if (c
>= 'A' && c
<= 'F')
1261 return c
- 'A' + 10;
1262 return c
- 'a' + 10;
1266 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1267 advances text pointer past the converted sequence,
1268 returns the converted value
1270 static unsigned int hex_convert (const byte
**text
)
1272 unsigned int value
= 0;
1274 while (is_hex (**text
))
1276 value
= value
* 0x10 + hex2dec (**text
);
1284 returns 1 if given character is OCT digit 0-7,
1287 static int is_oct (byte c
)
1289 return c
>= '0' && c
<= '7';
1293 returns value of passed character as if it was OCT digit
1295 static int oct2dec (byte c
)
1300 static byte
get_escape_sequence (const byte
**text
)
1304 /* skip '\' character */
1332 return (byte
) hex_convert (text
);
1336 if (is_oct (**text
))
1338 value
= oct2dec (*(*text
)++);
1339 if (is_oct (**text
))
1341 value
= value
* 010 + oct2dec (*(*text
)++);
1342 if (is_oct (**text
))
1343 value
= value
* 010 + oct2dec (*(*text
)++);
1347 return (byte
) value
;
1351 copies characters from *text to *str until " or ' character is encountered,
1352 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1353 assumes that *text points to " or ' character that starts the string,
1354 text pointer is advanced to point past the " or ' character,
1355 returns 0 if string was successfully copied,
1358 static int get_string (const byte
**text
, byte
**str
)
1360 const byte
*t
= *text
;
1362 unsigned int len
= 0;
1365 if (string_grow (&p
, &len
, '\0'))
1368 /* read " or ' character that starts the string */
1370 /* while next character is not the terminating character */
1371 while (*t
&& *t
!= term_char
)
1376 c
= get_escape_sequence (&t
);
1380 if (string_grow (&p
, &len
, c
))
1382 mem_free ((void **) &p
);
1386 /* skip " or ' character that ends the string */
1395 gets emit code, the syntax is: ".emtcode" " " <symbol> " " ("0x" | "0X") <hex_value>
1396 assumes that *text already points to <symbol>,
1397 returns 0 if emit code is successfully read,
1400 static int get_emtcode (const byte
**text
, map_byte
**ma
)
1402 const byte
*t
= *text
;
1405 map_byte_create (&m
);
1409 if (get_identifier (&t
, &m
->key
))
1411 map_byte_destroy (&m
);
1420 if (get_string (&t
, &c
))
1422 map_byte_destroy (&m
);
1426 m
->data
= (byte
) c
[0];
1427 mem_free ((void **) &c
);
1431 /* skip HEX "0x" or "0X" prefix */
1433 m
->data
= (byte
) hex_convert (&t
);
1444 gets regbyte declaration, the syntax is: ".regbyte" " " <symbol> " " ("0x" | "0X") <hex_value>
1445 assumes that *text already points to <symbol>,
1446 returns 0 if regbyte is successfully read,
1449 static int get_regbyte (const byte
**text
, map_byte
**ma
)
1451 return get_emtcode (text
, ma
);
1455 returns 0 on success,
1458 static int get_errtext (const byte
**text
, map_str
**ma
)
1460 const byte
*t
= *text
;
1463 map_str_create (&m
);
1467 if (get_identifier (&t
, &m
->key
))
1469 map_str_destroy (&m
);
1474 if (get_string (&t
, &m
->data
))
1476 map_str_destroy (&m
);
1487 returns 0 on success,
1488 returns 1 otherwise,
1490 static int get_error (const byte
**text
, error
**er
, map_str
*maps
)
1492 const byte
*t
= *text
;
1499 if (get_identifier (&t
, &temp
))
1503 if (!str_equal ((byte
*) "error", temp
))
1505 mem_free ((void **) &temp
);
1509 mem_free ((void **) &temp
);
1517 if (get_string (&t
, &(**er
).m_text
))
1526 if (get_identifier (&t
, &temp
))
1533 if (map_str_find (&maps
, temp
, &(**er
).m_text
))
1535 mem_free ((void **) &temp
);
1540 mem_free ((void **) &temp
);
1543 /* try to extract "token" from "...$token$..." */
1545 byte
*processed
= NULL
;
1546 unsigned int len
= 0, i
= 0;
1548 if (string_grow (&processed
, &len
, '\0'))
1554 while (i
< str_length ((**er
).m_text
))
1556 /* check if the dollar sign is repeated - if so skip it */
1557 if ((**er
).m_text
[i
] == '$' && (**er
).m_text
[i
+ 1] == '$')
1559 if (string_grow (&processed
, &len
, '$'))
1561 mem_free ((void **) &processed
);
1568 else if ((**er
).m_text
[i
] != '$')
1570 if (string_grow (&processed
, &len
, (**er
).m_text
[i
]))
1572 mem_free ((void **) &processed
);
1581 if (string_grow (&processed
, &len
, '$'))
1583 mem_free ((void **) &processed
);
1589 /* length of token being extracted */
1590 unsigned int tlen
= 0;
1592 if (string_grow (&(**er
).m_token_name
, &tlen
, '\0'))
1594 mem_free ((void **) &processed
);
1599 /* skip the dollar sign */
1602 while ((**er
).m_text
[i
] != '$')
1604 if (string_grow (&(**er
).m_token_name
, &tlen
, (**er
).m_text
[i
]))
1606 mem_free ((void **) &processed
);
1614 /* skip the dollar sign */
1620 mem_free ((void **) &(**er
).m_text
);
1621 (**er
).m_text
= processed
;
1629 returns 0 on success,
1630 returns 1 otherwise,
1632 static int get_emits (const byte
**text
, emit
**em
, map_byte
*mapb
)
1634 const byte
*t
= *text
;
1643 if (get_identifier (&t
, &temp
))
1648 if (str_equal ((byte
*) "emit", temp
))
1651 else if (str_equal ((byte
*) "load", temp
))
1655 mem_free ((void **) &temp
);
1659 mem_free ((void **) &temp
);
1665 e
->m_emit_dest
= dest
;
1667 if (dest
== ed_regbyte
)
1669 if (get_identifier (&t
, &e
->m_regname
))
1681 e
->m_byte
= (byte
) hex_convert (&t
);
1683 e
->m_emit_type
= et_byte
;
1690 e
->m_emit_type
= et_stream
;
1697 e
->m_emit_type
= et_position
;
1700 else if (*t
== '\'')
1702 if (get_string (&t
, &temp
))
1707 e
->m_byte
= (byte
) temp
[0];
1709 mem_free ((void **) &temp
);
1711 e
->m_emit_type
= et_byte
;
1715 if (get_identifier (&t
, &temp
))
1721 if (map_byte_find (&mapb
, temp
, &e
->m_byte
))
1723 mem_free ((void **) &temp
);
1728 mem_free ((void **) &temp
);
1730 e
->m_emit_type
= et_byte
;
1735 if (get_emits (&t
, &e
->m_next
, mapb
))
1747 returns 0 on success,
1748 returns 1 otherwise,
1750 static int get_spec (const byte
**text
, spec
**sp
, map_str
*maps
, map_byte
*mapb
)
1752 const byte
*t
= *text
;
1759 /* first - read optional .if statement */
1763 byte
*keyword
= NULL
;
1768 if (get_identifier (&u
, &keyword
))
1775 if (str_equal ((byte
*) "if", keyword
))
1777 cond_create (&s
->m_cond
);
1778 if (s
->m_cond
== NULL
)
1784 /* skip the left paren */
1788 /* get the left operand */
1790 if (get_identifier (&u
, &s
->m_cond
->m_operands
[0].m_regname
))
1795 s
->m_cond
->m_operands
[0].m_type
= cot_regbyte
;
1797 /* get the operator (!= or ==) */
1800 s
->m_cond
->m_type
= ct_not_equal
;
1802 s
->m_cond
->m_type
= ct_equal
;
1805 /* skip the 0x prefix */
1809 /* get the right operand */
1810 s
->m_cond
->m_operands
[1].m_byte
= hex_convert (&u
);
1811 s
->m_cond
->m_operands
[1].m_type
= cot_byte
;
1813 /* skip the right paren */
1822 mem_free ((void **) &keyword
);
1829 if (get_string (&t
, &temp
))
1840 /* skip the '-' character */
1844 if (get_string (&t
, &temp2
))
1846 mem_free ((void **) &temp
);
1852 s
->m_spec_type
= st_byte_range
;
1853 s
->m_byte
[0] = *temp
;
1854 s
->m_byte
[1] = *temp2
;
1856 mem_free ((void **) &temp2
);
1860 s
->m_spec_type
= st_byte
;
1864 mem_free ((void **) &temp
);
1868 if (get_string (&t
, &s
->m_string
))
1875 s
->m_spec_type
= st_string
;
1879 byte
*keyword
= NULL
;
1884 if (get_identifier (&t
, &keyword
))
1892 if (str_equal ((byte
*) "true", keyword
))
1894 s
->m_spec_type
= st_true
;
1897 else if (str_equal ((byte
*) "false", keyword
))
1899 s
->m_spec_type
= st_false
;
1902 else if (str_equal ((byte
*) "debug", keyword
))
1904 s
->m_spec_type
= st_debug
;
1907 else if (str_equal ((byte
*) "loop", keyword
))
1909 if (get_identifier (&t
, &s
->m_string
))
1911 mem_free ((void **) &keyword
);
1917 s
->m_spec_type
= st_identifier_loop
;
1920 mem_free ((void **) &keyword
);
1924 if (get_identifier (&t
, &s
->m_string
))
1931 s
->m_spec_type
= st_identifier
;
1934 if (get_error (&t
, &s
->m_errtext
, maps
))
1940 if (get_emits (&t
, &s
->m_emits
, mapb
))
1952 returns 0 on success,
1953 returns 1 otherwise,
1955 static int get_rule (const byte
**text
, rule
**ru
, map_str
*maps
, map_byte
*mapb
)
1957 const byte
*t
= *text
;
1964 if (get_spec (&t
, &r
->m_specs
, maps
, mapb
))
1975 /* skip the dot that precedes "and" or "or" */
1978 /* read "and" or "or" keyword */
1979 if (get_identifier (&t
, &op
))
1986 if (r
->m_oper
== op_none
)
1989 if (str_equal ((byte
*) "and", op
))
1996 mem_free ((void **) &op
);
1998 if (get_spec (&t
, &sp
, maps
, mapb
))
2004 spec_append (&r
->m_specs
, &sp
);
2007 /* skip the semicolon */
2017 returns 0 on success,
2018 returns 1 otherwise,
2020 static int update_dependency (map_rule
*mapr
, byte
*symbol
, rule
**ru
)
2022 if (map_rule_find (&mapr
, symbol
, ru
))
2025 /* (**ru).m_referenced = 1; */
2031 returns 0 on success,
2032 returns 1 otherwise,
2034 static int update_dependencies (dict
*di
, map_rule
*mapr
, byte
**syntax_symbol
,
2035 byte
**string_symbol
, map_byte
*regbytes
)
2037 rule
*rulez
= di
->m_rulez
;
2039 /* update dependecies for the root and lexer symbols */
2040 if (update_dependency (mapr
, *syntax_symbol
, &di
->m_syntax
) ||
2041 (*string_symbol
!= NULL
&& update_dependency (mapr
, *string_symbol
, &di
->m_string
)))
2044 mem_free ((void **) syntax_symbol
);
2045 mem_free ((void **) string_symbol
);
2047 /* update dependecies for the rest of the rules */
2050 spec
*sp
= rulez
->m_specs
;
2052 /* iterate through all the specifiers */
2055 /* update dependency for identifier */
2056 if (sp
->m_spec_type
== st_identifier
|| sp
->m_spec_type
== st_identifier_loop
)
2058 if (update_dependency (mapr
, sp
->m_string
, &sp
->m_rule
))
2061 mem_free ((void **) &sp
->m_string
);
2064 /* some errtexts reference to a rule */
2065 if (sp
->m_errtext
&& sp
->m_errtext
->m_token_name
)
2067 if (update_dependency (mapr
, sp
->m_errtext
->m_token_name
, &sp
->m_errtext
->m_token
))
2070 mem_free ((void **) &sp
->m_errtext
->m_token_name
);
2073 /* update dependency for condition */
2077 for (i
= 0; i
< 2; i
++)
2078 if (sp
->m_cond
->m_operands
[i
].m_type
== cot_regbyte
)
2080 sp
->m_cond
->m_operands
[i
].m_regbyte
= map_byte_locate (®bytes
,
2081 sp
->m_cond
->m_operands
[i
].m_regname
);
2083 if (sp
->m_cond
->m_operands
[i
].m_regbyte
== NULL
)
2086 mem_free ((void **) &sp
->m_cond
->m_operands
[i
].m_regname
);
2090 /* update dependency for all .load instructions */
2093 emit
*em
= sp
->m_emits
;
2096 if (em
->m_emit_dest
== ed_regbyte
)
2098 em
->m_regbyte
= map_byte_locate (®bytes
, em
->m_regname
);
2100 if (em
->m_regbyte
== NULL
)
2103 mem_free ((void **) &em
->m_regname
);
2113 rulez
= rulez
->m_next
;
2116 /* check for unreferenced symbols */
2117 /* de = di->m_defntns;
2120 if (!de->m_referenced)
2139 static int satisfies_condition (cond
*co
, regbyte_ctx
*ctx
)
2147 for (i
= 0; i
< 2; i
++)
2148 switch (co
->m_operands
[i
].m_type
)
2151 values
[i
] = co
->m_operands
[i
].m_byte
;
2154 values
[i
] = regbyte_ctx_extract (&ctx
, co
->m_operands
[i
].m_regbyte
);
2161 return values
[0] == values
[1];
2163 return values
[0] != values
[1];
2169 static void free_regbyte_ctx_stack (regbyte_ctx
*top
, regbyte_ctx
*limit
)
2171 while (top
!= limit
)
2173 regbyte_ctx
*rbc
= top
->m_prev
;
2174 regbyte_ctx_destroy (&top
);
2179 typedef enum match_result_
2181 mr_not_matched
, /* the examined string does not match */
2182 mr_matched
, /* the examined string matches */
2183 mr_error_raised
, /* mr_not_matched + error has been raised */
2184 mr_dont_emit
, /* used by identifier loops only */
2185 mr_internal_error
/* an internal error has occured such as out of memory */
2189 This function does the main job. It parses the text and generates output data.
2191 XXX optimize it - the barray seems to be the bottleneck
2193 static match_result
match (dict
*di
, const byte
*text
, unsigned int *index
, rule
*ru
, barray
**ba
,
2194 int filtering_string
, regbyte_ctx
**rbc
)
2196 unsigned int ind
= *index
;
2197 match_result status
= mr_not_matched
;
2198 spec
*sp
= ru
->m_specs
;
2199 regbyte_ctx
*ctx
= *rbc
;
2201 /* for every specifier in the rule */
2204 unsigned int i
, len
, save_ind
= ind
;
2205 barray
*array
= NULL
;
2207 if (satisfies_condition (sp
->m_cond
, ctx
))
2209 switch (sp
->m_spec_type
)
2212 barray_create (&array
);
2215 free_regbyte_ctx_stack (ctx
, *rbc
);
2216 return mr_internal_error
;
2219 status
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2220 if (status
== mr_internal_error
)
2222 free_regbyte_ctx_stack (ctx
, *rbc
);
2223 barray_destroy (&array
);
2224 return mr_internal_error
;
2228 len
= str_length (sp
->m_string
);
2230 /* prefilter the stream */
2231 if (!filtering_string
&& di
->m_string
)
2234 unsigned int filter_index
= 0;
2235 match_result result
;
2236 regbyte_ctx
*null_ctx
= NULL
;
2238 barray_create (&ba
);
2241 free_regbyte_ctx_stack (ctx
, *rbc
);
2242 return mr_internal_error
;
2245 result
= match (di
, text
+ ind
, &filter_index
, di
->m_string
, &ba
, 1, &null_ctx
);
2247 if (result
== mr_internal_error
)
2249 free_regbyte_ctx_stack (ctx
, *rbc
);
2250 barray_destroy (&ba
);
2251 return mr_internal_error
;
2254 if (result
!= mr_matched
)
2256 barray_destroy (&ba
);
2257 status
= mr_not_matched
;
2261 barray_destroy (&ba
);
2263 if (filter_index
!= len
|| !str_equal_n (sp
->m_string
, text
+ ind
, len
))
2265 status
= mr_not_matched
;
2269 status
= mr_matched
;
2274 status
= mr_matched
;
2275 for (i
= 0; status
== mr_matched
&& i
< len
; i
++)
2276 if (text
[ind
+ i
] != sp
->m_string
[i
])
2277 status
= mr_not_matched
;
2278 if (status
== mr_matched
)
2283 status
= text
[ind
] == *sp
->m_byte
? mr_matched
: mr_not_matched
;
2284 if (status
== mr_matched
)
2288 status
= (text
[ind
] >= sp
->m_byte
[0] && text
[ind
] <= sp
->m_byte
[1]) ?
2289 mr_matched
: mr_not_matched
;
2290 if (status
== mr_matched
)
2294 status
= mr_matched
;
2297 status
= mr_not_matched
;
2300 status
= ru
->m_oper
== op_and
? mr_matched
: mr_not_matched
;
2302 case st_identifier_loop
:
2303 barray_create (&array
);
2306 free_regbyte_ctx_stack (ctx
, *rbc
);
2307 return mr_internal_error
;
2310 status
= mr_dont_emit
;
2313 match_result result
;
2316 result
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2318 if (result
== mr_error_raised
)
2323 else if (result
== mr_matched
)
2325 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
) ||
2326 barray_append (ba
, &array
))
2328 free_regbyte_ctx_stack (ctx
, *rbc
);
2329 barray_destroy (&array
);
2330 return mr_internal_error
;
2332 barray_destroy (&array
);
2333 barray_create (&array
);
2336 free_regbyte_ctx_stack (ctx
, *rbc
);
2337 return mr_internal_error
;
2340 else if (result
== mr_internal_error
)
2342 free_regbyte_ctx_stack (ctx
, *rbc
);
2343 barray_destroy (&array
);
2344 return mr_internal_error
;
2354 status
= mr_not_matched
;
2357 if (status
== mr_error_raised
)
2359 free_regbyte_ctx_stack (ctx
, *rbc
);
2360 barray_destroy (&array
);
2362 return mr_error_raised
;
2365 if (ru
->m_oper
== op_and
&& status
!= mr_matched
&& status
!= mr_dont_emit
)
2367 free_regbyte_ctx_stack (ctx
, *rbc
);
2368 barray_destroy (&array
);
2372 set_last_error (sp
->m_errtext
->m_text
, error_get_token (sp
->m_errtext
, di
, text
,
2375 return mr_error_raised
;
2378 return mr_not_matched
;
2381 if (status
== mr_matched
)
2384 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
))
2386 free_regbyte_ctx_stack (ctx
, *rbc
);
2387 barray_destroy (&array
);
2388 return mr_internal_error
;
2392 if (barray_append (ba
, &array
))
2394 free_regbyte_ctx_stack (ctx
, *rbc
);
2395 barray_destroy (&array
);
2396 return mr_internal_error
;
2400 barray_destroy (&array
);
2402 /* if the rule operator is a logical or, we pick up the first matching specifier */
2403 if (ru
->m_oper
== op_or
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2413 /* everything went fine - all specifiers match up */
2414 if (ru
->m_oper
== op_and
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2421 free_regbyte_ctx_stack (ctx
, *rbc
);
2422 return mr_not_matched
;
2425 static byte
*error_get_token (error
*er
, dict
*di
, const byte
*text
, unsigned int ind
)
2432 unsigned int filter_index
= 0;
2433 regbyte_ctx
*ctx
= NULL
;
2435 barray_create (&ba
);
2438 if (match (di
, text
+ ind
, &filter_index
, er
->m_token
, &ba
, 0, &ctx
) == mr_matched
&&
2441 str
= mem_alloc (filter_index
+ 1);
2444 str_copy_n (str
, text
+ ind
, filter_index
);
2445 str
[filter_index
] = '\0';
2448 barray_destroy (&ba
);
2455 typedef struct grammar_load_state_
2458 byte
*syntax_symbol
;
2459 byte
*string_symbol
;
2463 } grammar_load_state
;
2465 static void grammar_load_state_create (grammar_load_state
**gr
)
2467 *gr
= mem_alloc (sizeof (grammar_load_state
));
2471 (**gr
).syntax_symbol
= NULL
;
2472 (**gr
).string_symbol
= NULL
;
2479 static void grammar_load_state_destroy (grammar_load_state
**gr
)
2483 dict_destroy (&(**gr
).di
);
2484 mem_free ((void **) &(**gr
).syntax_symbol
);
2485 mem_free ((void **) &(**gr
).string_symbol
);
2486 map_str_destroy (&(**gr
).maps
);
2487 map_byte_destroy (&(**gr
).mapb
);
2488 map_rule_destroy (&(**gr
).mapr
);
2489 mem_free ((void **) gr
);
2497 grammar
grammar_load_from_text (const byte
*text
)
2499 grammar_load_state
*g
= NULL
;
2502 clear_last_error ();
2504 grammar_load_state_create (&g
);
2508 dict_create (&g
->di
);
2511 grammar_load_state_destroy (&g
);
2517 /* skip ".syntax" keyword */
2521 /* retrieve root symbol */
2522 if (get_identifier (&text
, &g
->syntax_symbol
))
2524 grammar_load_state_destroy (&g
);
2529 /* skip semicolon */
2535 byte
*symbol
= NULL
;
2536 int is_dot
= *text
== '.';
2541 if (get_identifier (&text
, &symbol
))
2543 grammar_load_state_destroy (&g
);
2549 if (is_dot
&& str_equal (symbol
, (byte
*) "emtcode"))
2551 map_byte
*ma
= NULL
;
2553 mem_free ((void **) &symbol
);
2555 if (get_emtcode (&text
, &ma
))
2557 grammar_load_state_destroy (&g
);
2561 map_byte_append (&g
->mapb
, &ma
);
2564 else if (is_dot
&& str_equal (symbol
, (byte
*) "regbyte"))
2566 map_byte
*ma
= NULL
;
2568 mem_free ((void **) &symbol
);
2570 if (get_regbyte (&text
, &ma
))
2572 grammar_load_state_destroy (&g
);
2576 map_byte_append (&g
->di
->m_regbytes
, &ma
);
2579 else if (is_dot
&& str_equal (symbol
, (byte
*) "errtext"))
2583 mem_free ((void **) &symbol
);
2585 if (get_errtext (&text
, &ma
))
2587 grammar_load_state_destroy (&g
);
2591 map_str_append (&g
->maps
, &ma
);
2594 else if (is_dot
&& str_equal (symbol
, (byte
*) "string"))
2596 mem_free ((void **) &symbol
);
2598 if (g
->di
->m_string
!= NULL
)
2600 grammar_load_state_destroy (&g
);
2604 if (get_identifier (&text
, &g
->string_symbol
))
2606 grammar_load_state_destroy (&g
);
2610 /* skip semicolon */
2618 map_rule
*ma
= NULL
;
2620 if (get_rule (&text
, &ru
, g
->maps
, g
->mapb
))
2622 grammar_load_state_destroy (&g
);
2626 rule_append (&g
->di
->m_rulez
, &ru
);
2628 /* if a rule consist of only one specifier, give it an ".and" operator */
2629 if (ru
->m_oper
== op_none
)
2630 ru
->m_oper
= op_and
;
2632 map_rule_create (&ma
);
2635 grammar_load_state_destroy (&g
);
2641 map_rule_append (&g
->mapr
, &ma
);
2645 if (update_dependencies (g
->di
, g
->mapr
, &g
->syntax_symbol
, &g
->string_symbol
,
2648 grammar_load_state_destroy (&g
);
2652 dict_append (&g_dicts
, &g
->di
);
2656 grammar_load_state_destroy (&g
);
2661 int grammar_set_reg8 (grammar id
, const byte
*name
, byte value
)
2664 map_byte
*reg
= NULL
;
2666 clear_last_error ();
2668 dict_find (&g_dicts
, id
, &di
);
2671 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
2675 reg
= map_byte_locate (&di
->m_regbytes
, name
);
2678 set_last_error (INVALID_REGISTER_NAME
, str_duplicate (name
), -1);
2686 int grammar_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
)
2690 unsigned int index
= 0;
2691 regbyte_ctx
*rbc
= NULL
;
2693 clear_last_error ();
2695 dict_find (&g_dicts
, id
, &di
);
2698 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
2702 barray_create (&ba
);
2709 if (match (di
, text
, &index
, di
->m_syntax
, &ba
, 0, &rbc
) != mr_matched
)
2711 barray_destroy (&ba
);
2712 free_regbyte_ctx_stack (rbc
, NULL
);
2716 free_regbyte_ctx_stack (rbc
, NULL
);
2718 *prod
= mem_alloc (ba
->len
* sizeof (byte
));
2721 barray_destroy (&ba
);
2725 mem_copy (*prod
, ba
->data
, ba
->len
* sizeof (byte
));
2727 barray_destroy (&ba
);
2732 int grammar_destroy (grammar id
)
2734 dict
**di
= &g_dicts
;
2736 clear_last_error ();
2740 if ((**di
).m_id
== id
)
2743 *di
= (**di
).m_next
;
2744 dict_destroy (&tmp
);
2748 di
= &(**di
).m_next
;
2751 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
2755 void grammar_get_last_error (byte
*text
, unsigned int size
, int *pos
)
2757 unsigned int len
= 0, dots_made
= 0;
2758 const byte
*p
= error_message
;
2762 #define APPEND_CHARACTER(x) if (dots_made == 0) {\
2763 if (len < size - 1) {\
2764 text[len++] = (x); text[len] = '\0';\
2767 for (i = 0; i < 3; i++)\
2778 const byte
*r
= error_param
;
2782 APPEND_CHARACTER(*r
)
2790 APPEND_CHARACTER(*p
)
2794 *pos
= error_position
;
2796 #undef APPEND_CHARACTER