2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * syntax parsing engine
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
36 $Id: grammar.c,v 1.10 2004/12/08 14:00:46 alanh Exp $
43 The task is to check the syntax of an input string. Input string is a stream of ASCII
44 characters terminated with a null-character ('\0'). Checking it using C language is
45 difficult and hard to implement without bugs. It is hard to maintain and make changes when
48 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
49 only small changes. Even use of macros does not solve the problem because macros cannot
50 erase the complexity of the problem.
52 The resolution is to create a new language that will be highly oriented to our task. Once
53 we describe a particular syntax, we are done. We can then focus on the code that implements
54 the language. The size and complexity of it is relatively small than the code that directly
57 First, we must implement our new language. Here, the language is implemented in C, but it
58 could also be implemented in any other language. The code is listed below. We must take
59 a good care that it is bug free. This is simple because the code is simple and clean.
61 Next, we must describe the syntax of our new language in itself. Once created and checked
62 manually that it is correct, we can use it to check another scripts.
64 Note that our new language loading code does not have to check the syntax. It is because we
65 assume that the script describing itself is correct, and other scripts can be syntactically
66 checked by the former script. The loading code must only do semantic checking which leads us to
67 simple resolving references.
72 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
73 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
74 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
75 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
76 definition. Specifier can be a symbol, string, character, character range or a special
77 keyword ".true" or ".false".
79 On the very beginning of the script there is a declaration of a root symbol and is in the form:
80 .syntax <root_symbol>;
81 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
82 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
83 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
84 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
85 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
86 true if any of the specifiers evaluates to true. If definition contains only one specifier,
87 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
89 If specifier is a ".true" keyword, it always evaluates to true.
91 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
92 when it does not evaluate to true.
94 Character range specifier is in the form:
95 '<first_character>' - '<second_character>'
96 If specifier is a character range, it evaluates to true if character in the stream is greater
97 or equal to <first_character> and less or equal to <second_character>. In that situation
98 the stream pointer is advanced to point to next character in the stream. All C-style escape
99 sequences are supported although trigraph sequences are not. The comparisions are performed
100 on 8-bit unsigned integers.
102 Character specifier is in the form:
104 It evaluates to true if the following character range specifier evaluates to true:
105 '<single_character>' - '<single_character>'
107 String specifier is in the form:
109 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
110 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
111 the following character specifier evaluates to true:
113 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
115 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
117 where <symbol> is defined as follows:
118 <symbol> <definition>; (2)
119 Construction (1) is replaced by the following code:
121 and declaration (2) is replaced by the following:
122 <symbol$1> <symbol$2> .or .true;
123 <symbol$2> <symbol> .and <symbol$1>;
124 <symbol> <definition>;
126 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
127 registers that can be accessed in the syn body. Each reg has its name and a default value.
128 The register is one byte wide. The C code can change the default value by calling
129 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
130 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
131 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
132 where <operator> can be == or !=. If the condition evaluates to false, the specifier
133 evaluates to .false. Otherwise it evalutes to the specifier.
138 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
139 All occurences of the characters in the first column are replaced with the corresponding
140 character in the second column.
142 Escape sequence Represents
143 ------------------------------------------------------------------------------------------------
151 \' Single quotation mark
152 \" Double quotation mark
154 \? Literal question mark
155 \ooo ASCII character in octal notation
156 \xhhh ASCII character in hexadecimal notation
157 ------------------------------------------------------------------------------------------------
162 Any specifier can be followed by a special construction that is executed when the specifier
163 evaluates to false. The construction is in the form:
165 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
167 .errtext <ERROR_TEXT> "<error_desc>"
168 When specifier evaluates to false and this construction is present, parsing is stopped
169 immediately and <error_desc> is returned as a result of parsing. The error position is also
170 returned and it is meant as an offset from the beggining of the stream to the character that
171 was valid so far. Example:
173 (**** syntax script ****)
176 .errtext MISSING_SEMICOLON "missing ';'"
177 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
178 .loop space .and '\0';
179 declaration "declare" .and .loop space .and identifier;
182 (**** sample code ****)
186 In the example above checking the sample code will result in error message "missing ';'" and
187 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
188 assure that there is no code after semicolon - only spaces.
189 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
190 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
191 the identifier name. The starting position is the error position. The lenght of the resulting
192 string is the position after invoking the symbol.
197 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
198 that evaluate to true. That is, every specifier and optional error construction can be followed
199 by a number of emit constructions that are in the form:
201 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
202 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
204 .emtcode <identifier> <hex_number>
206 When given specifier evaluates to true, all emits associated with the specifier are output
207 in order they were declared. A star means that last-read character should be output instead
208 of constant value. Example:
210 (**** syntax script ****)
213 .emtcode WORD_FOO 0x01
214 .emtcode WORD_BAR 0x02
215 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
216 FOO "foo" .and SPACE;
217 BAR "bar" .and SPACE;
220 (**** sample text 1 ****)
224 (**** sample text 2 ****)
228 For both samples the result will be one-element array. For first sample text it will be
229 value 1, for second - 0. Note that every text will be accepted because of presence of
230 .true as an alternative.
234 (**** syntax script ****)
237 .emtcode VARIABLE 0x01
238 declaration "declare" .and .loop space .and
239 identifier .emit VARIABLE .and (1)
240 .true .emit 0x00 .and (2)
241 .loop space .and ';';
243 identifier .loop id_char .emit *; (3)
244 id_char 'a'-'z' .or 'A'-'Z' .or '_';
246 (**** sample code ****)
250 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
251 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
252 to terminate the string with null to signal when the string ends. Specifier (3) outputs
253 all characters that make declared identifier. The result of sample code will be the
255 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
257 If .emit is followed by dollar $, it means that current position should be output. Current
258 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
259 first character consumed by the specifier associated with the .emit instruction. Current
260 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
264 static void mem_free (void **);
267 internal error messages
269 static const byte
*OUT_OF_MEMORY
= (byte
*) "internal error 1001: out of physical memory";
270 static const byte
*UNRESOLVED_REFERENCE
= (byte
*) "internal error 1002: unresolved reference '$'";
271 static const byte
*INVALID_GRAMMAR_ID
= (byte
*) "internal error 1003: invalid grammar object";
272 static const byte
*INVALID_REGISTER_NAME
= (byte
*) "internal error 1004: invalid register name: '$'";
273 static const byte
*DUPLICATE_IDENTIFIER
= (byte
*) "internal error 1005: identifier '$' already defined";
274 static const byte
*UNREFERENCED_IDENTIFIER
=(byte
*) "internal error 1006: unreferenced identifier '$'";
276 static const byte
*error_message
= NULL
; /* points to one of the error messages above */
277 static byte
*error_param
= NULL
; /* this is inserted into error_message in place of $ */
278 static int error_position
= -1;
280 static byte
*unknown
= (byte
*) "???";
282 static void clear_last_error (void)
284 /* reset error message */
285 error_message
= NULL
;
287 /* free error parameter - if error_param is a "???" don't free it - it's static */
288 if (error_param
!= unknown
)
289 mem_free ((void **) (void *) &error_param
);
293 /* reset error position */
297 static void set_last_error (const byte
*msg
, byte
*param
, int pos
)
299 /* error message can be set only once */
300 if (error_message
!= NULL
)
302 mem_free ((void **) (void *) ¶m
);
308 /* if param is NULL, set error_param to unknown ("???") */
309 /* note: do not try to strdup the "???" - it may be that we are here because of */
310 /* out of memory error so strdup can fail */
314 error_param
= unknown
;
316 error_position
= pos
;
320 memory management routines
322 static void *mem_alloc (size_t size
)
324 void *ptr
= grammar_alloc_malloc (size
);
326 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
330 static void *mem_copy (void *dst
, const void *src
, size_t size
)
332 return grammar_memory_copy (dst
, src
, size
);
335 static void mem_free (void **ptr
)
337 grammar_alloc_free (*ptr
);
341 static void *mem_realloc (void *ptr
, size_t old_size
, size_t new_size
)
343 void *ptr2
= grammar_alloc_realloc (ptr
, old_size
, new_size
);
345 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
349 static byte
*str_copy_n (byte
*dst
, const byte
*src
, size_t max_len
)
351 return grammar_string_copy_n (dst
, src
, max_len
);
354 static byte
*str_duplicate (const byte
*str
)
356 byte
*new_str
= grammar_string_duplicate (str
);
358 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
362 static int str_equal (const byte
*str1
, const byte
*str2
)
364 return grammar_string_compare (str1
, str2
) == 0;
367 static int str_equal_n (const byte
*str1
, const byte
*str2
, unsigned int n
)
369 return grammar_string_compare_n (str1
, str2
, n
) == 0;
372 static unsigned int str_length (const byte
*str
)
374 return grammar_string_length (str
);
380 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
381 static void _Ty##_append (_Ty **x, _Ty *nx) {\
382 while (*x) x = &(**x).next;\
387 string to byte map typedef
389 typedef struct map_byte_
393 struct map_byte_
*next
;
396 static void map_byte_create (map_byte
**ma
)
398 *ma
= (map_byte
*) mem_alloc (sizeof (map_byte
));
407 static void map_byte_destroy (map_byte
**ma
)
411 map_byte_destroy (&(**ma
).next
);
412 mem_free ((void **) &(**ma
).key
);
413 mem_free ((void **) ma
);
417 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte
)
420 searches the map for the specified key,
421 returns pointer to the element with the specified key if it exists
422 returns NULL otherwise
424 static map_byte
*map_byte_locate (map_byte
**ma
, const byte
*key
)
428 if (str_equal ((**ma
).key
, key
))
434 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
439 searches the map for specified key,
440 if the key is matched, *data is filled with data associated with the key,
441 returns 0 if the key is matched,
444 static int map_byte_find (map_byte
**ma
, const byte
*key
, byte
*data
)
446 map_byte
*found
= map_byte_locate (ma
, key
);
458 regbyte context typedef
460 Each regbyte consists of its name and a default value. These are static and created at
461 grammar script compile-time, for example the following line:
462 .regbyte vertex_blend 0x00
463 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
464 When the script is executed, this regbyte can be accessed by name for read and write. When a
465 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
466 stack. The new entry contains information abot which regbyte it references and its new value.
467 When a given regbyte is accessed for read, the stack is searched top-down to find an
468 entry that references the regbyte. The first matching entry is used to return the current
469 value it holds. If no entry is found, the default value is returned.
471 typedef struct regbyte_ctx_
474 byte m_current_value
;
475 struct regbyte_ctx_
*m_prev
;
478 static void regbyte_ctx_create (regbyte_ctx
**re
)
480 *re
= (regbyte_ctx
*) mem_alloc (sizeof (regbyte_ctx
));
483 (**re
).m_regbyte
= NULL
;
484 (**re
).m_prev
= NULL
;
488 static void regbyte_ctx_destroy (regbyte_ctx
**re
)
492 mem_free ((void **) re
);
496 static byte
regbyte_ctx_extract (regbyte_ctx
**re
, map_byte
*reg
)
498 /* first lookup in the register stack */
501 if ((**re
).m_regbyte
== reg
)
502 return (**re
).m_current_value
;
507 /* if not found - return the default value */
514 typedef enum emit_type_
516 et_byte
, /* explicit number */
517 et_stream
, /* eaten character */
518 et_position
/* current position */
522 emit destination typedef
524 typedef enum emit_dest_
526 ed_output
, /* write to the output buffer */
527 ed_regbyte
/* write a particular regbyte */
535 emit_dest m_emit_dest
;
536 emit_type m_emit_type
; /* ed_output */
537 byte m_byte
; /* et_byte */
538 map_byte
*m_regbyte
; /* ed_regbyte */
539 byte
*m_regname
; /* ed_regbyte - temporary */
540 struct emit_
*m_next
;
543 static void emit_create (emit
**em
)
545 *em
= (emit
*) mem_alloc (sizeof (emit
));
548 (**em
).m_emit_dest
= ed_output
;
549 (**em
).m_emit_type
= et_byte
;
550 (**em
).m_byte
= '\0';
551 (**em
).m_regbyte
= NULL
;
552 (**em
).m_regname
= NULL
;
553 (**em
).m_next
= NULL
;
557 static void emit_destroy (emit
**em
)
561 emit_destroy (&(**em
).m_next
);
562 mem_free ((void **) &(**em
).m_regname
);
563 mem_free ((void **) em
);
567 static unsigned int emit_size (emit
*_E
)
573 if (_E
->m_emit_dest
== ed_output
)
575 if (_E
->m_emit_type
== et_position
)
576 _N
+= 4; /* position is a 32-bit unsigned integer */
586 static int emit_push (emit
*_E
, byte
*_P
, byte _C
, unsigned int _Pos
, regbyte_ctx
**_Ctx
)
590 if (_E
->m_emit_dest
== ed_output
)
592 if (_E
->m_emit_type
== et_byte
)
594 else if (_E
->m_emit_type
== et_stream
)
596 else /* _Em->type == et_position */
598 *_P
++ = (byte
) (_Pos
);
599 *_P
++ = (byte
) (_Pos
>> 8);
600 *_P
++ = (byte
) (_Pos
>> 16);
601 *_P
++ = (byte
) (_Pos
>> 24);
606 regbyte_ctx
*new_rbc
;
607 regbyte_ctx_create (&new_rbc
);
611 new_rbc
->m_prev
= *_Ctx
;
612 new_rbc
->m_regbyte
= _E
->m_regbyte
;
615 if (_E
->m_emit_type
== et_byte
)
616 new_rbc
->m_current_value
= _E
->m_byte
;
617 else if (_E
->m_emit_type
== et_stream
)
618 new_rbc
->m_current_value
= _C
;
630 typedef struct error_
634 struct rule_
*m_token
;
637 static void error_create (error
**er
)
639 *er
= (error
*) mem_alloc (sizeof (error
));
642 (**er
).m_text
= NULL
;
643 (**er
).m_token_name
= NULL
;
644 (**er
).m_token
= NULL
;
648 static void error_destroy (error
**er
)
652 mem_free ((void **) &(**er
).m_text
);
653 mem_free ((void **) &(**er
).m_token_name
);
654 mem_free ((void **) er
);
659 static byte
*error_get_token (error
*, struct dict_
*, const byte
*, unsigned int);
662 condition operand type typedef
664 typedef enum cond_oper_type_
666 cot_byte
, /* constant 8-bit unsigned integer */
667 cot_regbyte
/* pointer to byte register containing the current value */
671 condition operand typedef
673 typedef struct cond_oper_
675 cond_oper_type m_type
;
676 byte m_byte
; /* cot_byte */
677 map_byte
*m_regbyte
; /* cot_regbyte */
678 byte
*m_regname
; /* cot_regbyte - temporary */
682 condition type typedef
684 typedef enum cond_type_
696 cond_oper m_operands
[2];
699 static void cond_create (cond
**co
)
701 *co
= (cond
*) mem_alloc (sizeof (cond
));
704 (**co
).m_operands
[0].m_regname
= NULL
;
705 (**co
).m_operands
[1].m_regname
= NULL
;
709 static void cond_destroy (cond
**co
)
713 mem_free ((void **) &(**co
).m_operands
[0].m_regname
);
714 mem_free ((void **) &(**co
).m_operands
[1].m_regname
);
715 mem_free ((void **) co
);
720 specifier type typedef
722 typedef enum spec_type_
739 spec_type m_spec_type
;
740 byte m_byte
[2]; /* st_byte, st_byte_range */
741 byte
*m_string
; /* st_string */
742 struct rule_
*m_rule
; /* st_identifier, st_identifier_loop */
749 static void spec_create (spec
**sp
)
751 *sp
= (spec
*) mem_alloc (sizeof (spec
));
754 (**sp
).m_spec_type
= st_false
;
755 (**sp
).m_byte
[0] = '\0';
756 (**sp
).m_byte
[1] = '\0';
757 (**sp
).m_string
= NULL
;
758 (**sp
).m_rule
= NULL
;
759 (**sp
).m_emits
= NULL
;
760 (**sp
).m_errtext
= NULL
;
761 (**sp
).m_cond
= NULL
;
766 static void spec_destroy (spec
**sp
)
770 spec_destroy (&(**sp
).next
);
771 emit_destroy (&(**sp
).m_emits
);
772 error_destroy (&(**sp
).m_errtext
);
773 mem_free ((void **) &(**sp
).m_string
);
774 cond_destroy (&(**sp
).m_cond
);
775 mem_free ((void **) sp
);
779 GRAMMAR_IMPLEMENT_LIST_APPEND(spec
)
802 static void rule_create (rule
**ru
)
804 *ru
= (rule
*) mem_alloc (sizeof (rule
));
807 (**ru
).m_oper
= op_none
;
808 (**ru
).m_specs
= NULL
;
810 (**ru
).m_referenced
= 0;
814 static void rule_destroy (rule
**ru
)
818 rule_destroy (&(**ru
).next
);
819 spec_destroy (&(**ru
).m_specs
);
820 mem_free ((void **) ru
);
824 GRAMMAR_IMPLEMENT_LIST_APPEND(rule
)
827 returns unique grammar id
829 static grammar
next_valid_grammar_id (void)
831 static grammar id
= 0;
844 map_byte
*m_regbytes
;
849 static void dict_create (dict
**di
)
851 *di
= (dict
*) mem_alloc (sizeof (dict
));
854 (**di
).m_rulez
= NULL
;
855 (**di
).m_syntax
= NULL
;
856 (**di
).m_string
= NULL
;
857 (**di
).m_regbytes
= NULL
;
858 (**di
).m_id
= next_valid_grammar_id ();
863 static void dict_destroy (dict
**di
)
867 rule_destroy (&(**di
).m_rulez
);
868 map_byte_destroy (&(**di
).m_regbytes
);
869 mem_free ((void **) di
);
873 GRAMMAR_IMPLEMENT_LIST_APPEND(dict
)
875 static void dict_find (dict
**di
, grammar key
, dict
**data
)
879 if ((**di
).m_id
== key
)
891 static dict
*g_dicts
= NULL
;
896 typedef struct barray_
902 static void barray_create (barray
**ba
)
904 *ba
= (barray
*) mem_alloc (sizeof (barray
));
912 static void barray_destroy (barray
**ba
)
916 mem_free ((void **) &(**ba
).data
);
917 mem_free ((void **) ba
);
922 reallocates byte array to requested size,
923 returns 0 on success,
926 static int barray_resize (barray
**ba
, unsigned int nlen
)
932 mem_free ((void **) &(**ba
).data
);
940 new_pointer
= (byte
*) mem_realloc ((**ba
).data
, (**ba
).len
* sizeof (byte
),
941 nlen
* sizeof (byte
));
944 (**ba
).data
= new_pointer
;
955 adds byte array pointed by *nb to the end of array pointed by *ba,
956 returns 0 on success,
959 static int barray_append (barray
**ba
, barray
**nb
)
961 const unsigned int len
= (**ba
).len
;
963 if (barray_resize (ba
, (**ba
).len
+ (**nb
).len
))
966 mem_copy ((**ba
).data
+ len
, (**nb
).data
, (**nb
).len
);
972 adds emit chain pointed by em to the end of array pointed by *ba,
973 returns 0 on success,
976 static int barray_push (barray
**ba
, emit
*em
, byte c
, unsigned int pos
, regbyte_ctx
**rbc
)
978 unsigned int count
= emit_size (em
);
980 if (barray_resize (ba
, (**ba
).len
+ count
))
983 return emit_push (em
, (**ba
).data
+ ((**ba
).len
- count
), c
, pos
, rbc
);
989 typedef struct bytepool_
995 static void bytepool_destroy (bytepool
**by
)
999 mem_free ((void **) &(**by
)._F
);
1000 mem_free ((void **) by
);
1004 static void bytepool_create (bytepool
**by
, int len
)
1006 *by
= (bytepool
*) (mem_alloc (sizeof (bytepool
)));
1009 (**by
)._F
= (byte
*) (mem_alloc (sizeof (byte
) * len
));
1012 if ((**by
)._F
== NULL
)
1013 bytepool_destroy (by
);
1017 static int bytepool_reserve (bytepool
*by
, unsigned int _N
)
1024 /* byte pool can only grow and at least by doubling its size */
1025 _N
= _N
>= by
->_Siz
* 2 ? _N
: by
->_Siz
* 2;
1027 /* reallocate the memory and adjust pointers to the new memory location */
1028 _P
= (byte
*) (mem_realloc (by
->_F
, sizeof (byte
) * by
->_Siz
, sizeof (byte
) * _N
));
1040 string to string map typedef
1042 typedef struct map_str_
1046 struct map_str_
*next
;
1049 static void map_str_create (map_str
**ma
)
1051 *ma
= (map_str
*) mem_alloc (sizeof (map_str
));
1060 static void map_str_destroy (map_str
**ma
)
1064 map_str_destroy (&(**ma
).next
);
1065 mem_free ((void **) &(**ma
).key
);
1066 mem_free ((void **) &(**ma
).data
);
1067 mem_free ((void **) ma
);
1071 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str
)
1074 searches the map for specified key,
1075 if the key is matched, *data is filled with data associated with the key,
1076 returns 0 if the key is matched,
1079 static int map_str_find (map_str
**ma
, const byte
*key
, byte
**data
)
1083 if (str_equal ((**ma
).key
, key
))
1085 *data
= str_duplicate ((**ma
).data
);
1095 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1100 string to rule map typedef
1102 typedef struct map_rule_
1106 struct map_rule_
*next
;
1109 static void map_rule_create (map_rule
**ma
)
1111 *ma
= (map_rule
*) mem_alloc (sizeof (map_rule
));
1120 static void map_rule_destroy (map_rule
**ma
)
1124 map_rule_destroy (&(**ma
).next
);
1125 mem_free ((void **) &(**ma
).key
);
1126 mem_free ((void **) ma
);
1130 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule
)
1133 searches the map for specified key,
1134 if the key is matched, *data is filled with data associated with the key,
1135 returns 0 if the is matched,
1138 static int map_rule_find (map_rule
**ma
, const byte
*key
, rule
**data
)
1142 if (str_equal ((**ma
).key
, key
))
1144 *data
= (**ma
).data
;
1152 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1157 returns 1 if given character is a white space,
1160 static int is_space (byte c
)
1162 return c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r';
1166 advances text pointer by 1 if character pointed by *text is a space,
1167 returns 1 if a space has been eaten,
1170 static int eat_space (const byte
**text
)
1172 if (is_space (**text
))
1183 returns 1 if text points to C-style comment start string,
1186 static int is_comment_start (const byte
*text
)
1188 return text
[0] == '/' && text
[1] == '*';
1192 advances text pointer to first character after C-style comment block - if any,
1193 returns 1 if C-style comment block has been encountered and eaten,
1196 static int eat_comment (const byte
**text
)
1198 if (is_comment_start (*text
))
1200 /* *text points to comment block - skip two characters to enter comment body */
1202 /* skip any character except consecutive '*' and '/' */
1203 while (!((*text
)[0] == '*' && (*text
)[1] == '/'))
1205 /* skip those two terminating characters */
1215 advances text pointer to first character that is neither space nor C-style comment block
1217 static void eat_spaces (const byte
**text
)
1219 while (eat_space (text
) || eat_comment (text
))
1224 resizes string pointed by *ptr to successfully add character c to the end of the string,
1225 returns 0 on success,
1228 static int string_grow (byte
**ptr
, unsigned int *len
, byte c
)
1230 /* reallocate the string in 16-byte increments */
1231 if ((*len
& 0x0F) == 0x0F || *ptr
== NULL
)
1233 byte
*tmp
= (byte
*) mem_realloc (*ptr
, ((*len
+ 1) & ~0x0F) * sizeof (byte
),
1234 ((*len
+ 1 + 0x10) & ~0x0F) * sizeof (byte
));
1243 /* append given character */
1247 (*ptr
)[*len
] = '\0';
1253 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1256 static int is_identifier (byte c
)
1258 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || (c
>= '0' && c
<= '9') || c
== '_';
1262 copies characters from *text to *id until non-identifier character is encountered,
1263 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1264 text pointer is advanced to point past the copied identifier,
1265 returns 0 if identifier was successfully copied,
1268 static int get_identifier (const byte
**text
, byte
**id
)
1270 const byte
*t
= *text
;
1272 unsigned int len
= 0;
1274 if (string_grow (&p
, &len
, '\0'))
1277 /* loop while next character in buffer is valid for identifiers */
1278 while (is_identifier (*t
))
1280 if (string_grow (&p
, &len
, *t
++))
1282 mem_free ((void **) (void *) &p
);
1294 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1295 advances text pointer past the converted sequence,
1296 returns the converted value
1298 static unsigned int dec_convert (const byte
**text
)
1300 unsigned int value
= 0;
1302 while (**text
>= '0' && **text
<= '9')
1304 value
= value
* 10 + **text
- '0';
1312 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1315 static int is_hex (byte c
)
1317 return (c
>= '0' && c
<= '9') || (c
>= 'A' && c
<= 'F') || (c
>= 'a' && c
<= 'f');
1321 returns value of passed character as if it was HEX digit
1323 static unsigned int hex2dec (byte c
)
1325 if (c
>= '0' && c
<= '9')
1327 if (c
>= 'A' && c
<= 'F')
1328 return c
- 'A' + 10;
1329 return c
- 'a' + 10;
1333 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1334 advances text pointer past the converted sequence,
1335 returns the converted value
1337 static unsigned int hex_convert (const byte
**text
)
1339 unsigned int value
= 0;
1341 while (is_hex (**text
))
1343 value
= value
* 0x10 + hex2dec (**text
);
1351 returns 1 if given character is OCT digit 0-7,
1354 static int is_oct (byte c
)
1356 return c
>= '0' && c
<= '7';
1360 returns value of passed character as if it was OCT digit
1362 static int oct2dec (byte c
)
1367 static byte
get_escape_sequence (const byte
**text
)
1371 /* skip '\' character */
1399 return (byte
) hex_convert (text
);
1403 if (is_oct (**text
))
1405 value
= oct2dec (*(*text
)++);
1406 if (is_oct (**text
))
1408 value
= value
* 010 + oct2dec (*(*text
)++);
1409 if (is_oct (**text
))
1410 value
= value
* 010 + oct2dec (*(*text
)++);
1414 return (byte
) value
;
1418 copies characters from *text to *str until " or ' character is encountered,
1419 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1420 assumes that *text points to " or ' character that starts the string,
1421 text pointer is advanced to point past the " or ' character,
1422 returns 0 if string was successfully copied,
1425 static int get_string (const byte
**text
, byte
**str
)
1427 const byte
*t
= *text
;
1429 unsigned int len
= 0;
1432 if (string_grow (&p
, &len
, '\0'))
1435 /* read " or ' character that starts the string */
1437 /* while next character is not the terminating character */
1438 while (*t
&& *t
!= term_char
)
1443 c
= get_escape_sequence (&t
);
1447 if (string_grow (&p
, &len
, c
))
1449 mem_free ((void **) (void *) &p
);
1453 /* skip " or ' character that ends the string */
1462 gets emit code, the syntax is:
1463 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1464 assumes that *text already points to <symbol>,
1465 returns 0 if emit code is successfully read,
1468 static int get_emtcode (const byte
**text
, map_byte
**ma
)
1470 const byte
*t
= *text
;
1473 map_byte_create (&m
);
1477 if (get_identifier (&t
, &m
->key
))
1479 map_byte_destroy (&m
);
1488 if (get_string (&t
, &c
))
1490 map_byte_destroy (&m
);
1494 m
->data
= (byte
) c
[0];
1495 mem_free ((void **) (void *) &c
);
1497 else if (t
[0] == '0' && (t
[1] == 'x' || t
[1] == 'X'))
1499 /* skip HEX "0x" or "0X" prefix */
1501 m
->data
= (byte
) hex_convert (&t
);
1505 m
->data
= (byte
) dec_convert (&t
);
1516 gets regbyte declaration, the syntax is:
1517 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1518 assumes that *text already points to <symbol>,
1519 returns 0 if regbyte is successfully read,
1522 static int get_regbyte (const byte
**text
, map_byte
**ma
)
1524 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1525 return get_emtcode (text
, ma
);
1529 returns 0 on success,
1532 static int get_errtext (const byte
**text
, map_str
**ma
)
1534 const byte
*t
= *text
;
1537 map_str_create (&m
);
1541 if (get_identifier (&t
, &m
->key
))
1543 map_str_destroy (&m
);
1548 if (get_string (&t
, &m
->data
))
1550 map_str_destroy (&m
);
1561 returns 0 on success,
1562 returns 1 otherwise,
1564 static int get_error (const byte
**text
, error
**er
, map_str
*maps
)
1566 const byte
*t
= *text
;
1573 if (get_identifier (&t
, &temp
))
1577 if (!str_equal ((byte
*) "error", temp
))
1579 mem_free ((void **) (void *) &temp
);
1583 mem_free ((void **) (void *) &temp
);
1591 if (get_string (&t
, &(**er
).m_text
))
1600 if (get_identifier (&t
, &temp
))
1607 if (map_str_find (&maps
, temp
, &(**er
).m_text
))
1609 mem_free ((void **) (void *) &temp
);
1614 mem_free ((void **) (void *) &temp
);
1617 /* try to extract "token" from "...$token$..." */
1619 byte
*processed
= NULL
;
1620 unsigned int len
= 0, i
= 0;
1622 if (string_grow (&processed
, &len
, '\0'))
1628 while (i
< str_length ((**er
).m_text
))
1630 /* check if the dollar sign is repeated - if so skip it */
1631 if ((**er
).m_text
[i
] == '$' && (**er
).m_text
[i
+ 1] == '$')
1633 if (string_grow (&processed
, &len
, '$'))
1635 mem_free ((void **) (void *) &processed
);
1642 else if ((**er
).m_text
[i
] != '$')
1644 if (string_grow (&processed
, &len
, (**er
).m_text
[i
]))
1646 mem_free ((void **) (void *) &processed
);
1655 if (string_grow (&processed
, &len
, '$'))
1657 mem_free ((void **) (void *) &processed
);
1663 /* length of token being extracted */
1664 unsigned int tlen
= 0;
1666 if (string_grow (&(**er
).m_token_name
, &tlen
, '\0'))
1668 mem_free ((void **) (void *) &processed
);
1673 /* skip the dollar sign */
1676 while ((**er
).m_text
[i
] != '$')
1678 if (string_grow (&(**er
).m_token_name
, &tlen
, (**er
).m_text
[i
]))
1680 mem_free ((void **) (void *) &processed
);
1688 /* skip the dollar sign */
1694 mem_free ((void **) &(**er
).m_text
);
1695 (**er
).m_text
= processed
;
1703 returns 0 on success,
1704 returns 1 otherwise,
1706 static int get_emits (const byte
**text
, emit
**em
, map_byte
*mapb
)
1708 const byte
*t
= *text
;
1717 if (get_identifier (&t
, &temp
))
1722 if (str_equal ((byte
*) "emit", temp
))
1725 else if (str_equal ((byte
*) "load", temp
))
1729 mem_free ((void **) (void *) &temp
);
1733 mem_free ((void **) (void *) &temp
);
1739 e
->m_emit_dest
= dest
;
1741 if (dest
== ed_regbyte
)
1743 if (get_identifier (&t
, &e
->m_regname
))
1752 if (*t
== '0' && (t
[1] == 'x' || t
[1] == 'X'))
1755 e
->m_byte
= (byte
) hex_convert (&t
);
1757 e
->m_emit_type
= et_byte
;
1760 else if (*t
>= '0' && *t
<= '9')
1762 e
->m_byte
= (byte
) dec_convert (&t
);
1764 e
->m_emit_type
= et_byte
;
1771 e
->m_emit_type
= et_stream
;
1778 e
->m_emit_type
= et_position
;
1781 else if (*t
== '\'')
1783 if (get_string (&t
, &temp
))
1788 e
->m_byte
= (byte
) temp
[0];
1790 mem_free ((void **) (void *) &temp
);
1792 e
->m_emit_type
= et_byte
;
1796 if (get_identifier (&t
, &temp
))
1802 if (map_byte_find (&mapb
, temp
, &e
->m_byte
))
1804 mem_free ((void **) (void *) &temp
);
1809 mem_free ((void **) (void *) &temp
);
1811 e
->m_emit_type
= et_byte
;
1816 if (get_emits (&t
, &e
->m_next
, mapb
))
1828 returns 0 on success,
1829 returns 1 otherwise,
1831 static int get_spec (const byte
**text
, spec
**sp
, map_str
*maps
, map_byte
*mapb
)
1833 const byte
*t
= *text
;
1840 /* first - read optional .if statement */
1844 byte
*keyword
= NULL
;
1849 if (get_identifier (&u
, &keyword
))
1856 if (str_equal ((byte
*) "if", keyword
))
1858 cond_create (&s
->m_cond
);
1859 if (s
->m_cond
== NULL
)
1865 /* skip the left paren */
1869 /* get the left operand */
1871 if (get_identifier (&u
, &s
->m_cond
->m_operands
[0].m_regname
))
1876 s
->m_cond
->m_operands
[0].m_type
= cot_regbyte
;
1878 /* get the operator (!= or ==) */
1881 s
->m_cond
->m_type
= ct_not_equal
;
1883 s
->m_cond
->m_type
= ct_equal
;
1887 if (u
[0] == '0' && (u
[1] == 'x' || u
[1] == 'X'))
1889 /* skip the 0x prefix */
1892 /* get the right operand */
1893 s
->m_cond
->m_operands
[1].m_byte
= hex_convert (&u
);
1894 s
->m_cond
->m_operands
[1].m_type
= cot_byte
;
1896 else /*if (*u >= '0' && *u <= '9')*/
1898 /* get the right operand */
1899 s
->m_cond
->m_operands
[1].m_byte
= dec_convert (&u
);
1900 s
->m_cond
->m_operands
[1].m_type
= cot_byte
;
1903 /* skip the right paren */
1912 mem_free ((void **) (void *) &keyword
);
1919 if (get_string (&t
, &temp
))
1930 /* skip the '-' character */
1934 if (get_string (&t
, &temp2
))
1936 mem_free ((void **) (void *) &temp
);
1942 s
->m_spec_type
= st_byte_range
;
1943 s
->m_byte
[0] = *temp
;
1944 s
->m_byte
[1] = *temp2
;
1946 mem_free ((void **) (void *) &temp2
);
1950 s
->m_spec_type
= st_byte
;
1954 mem_free ((void **) (void *) &temp
);
1958 if (get_string (&t
, &s
->m_string
))
1965 s
->m_spec_type
= st_string
;
1969 byte
*keyword
= NULL
;
1974 if (get_identifier (&t
, &keyword
))
1982 if (str_equal ((byte
*) "true", keyword
))
1984 s
->m_spec_type
= st_true
;
1987 else if (str_equal ((byte
*) "false", keyword
))
1989 s
->m_spec_type
= st_false
;
1992 else if (str_equal ((byte
*) "debug", keyword
))
1994 s
->m_spec_type
= st_debug
;
1997 else if (str_equal ((byte
*) "loop", keyword
))
1999 if (get_identifier (&t
, &s
->m_string
))
2001 mem_free ((void **) (void *) &keyword
);
2007 s
->m_spec_type
= st_identifier_loop
;
2009 mem_free ((void **) (void *) &keyword
);
2013 if (get_identifier (&t
, &s
->m_string
))
2020 s
->m_spec_type
= st_identifier
;
2023 if (get_error (&t
, &s
->m_errtext
, maps
))
2029 if (get_emits (&t
, &s
->m_emits
, mapb
))
2041 returns 0 on success,
2042 returns 1 otherwise,
2044 static int get_rule (const byte
**text
, rule
**ru
, map_str
*maps
, map_byte
*mapb
)
2046 const byte
*t
= *text
;
2053 if (get_spec (&t
, &r
->m_specs
, maps
, mapb
))
2064 /* skip the dot that precedes "and" or "or" */
2067 /* read "and" or "or" keyword */
2068 if (get_identifier (&t
, &op
))
2075 if (r
->m_oper
== op_none
)
2078 if (str_equal ((byte
*) "and", op
))
2085 mem_free ((void **) (void *) &op
);
2087 if (get_spec (&t
, &sp
, maps
, mapb
))
2093 spec_append (&r
->m_specs
, sp
);
2096 /* skip the semicolon */
2106 returns 0 on success,
2107 returns 1 otherwise,
2109 static int update_dependency (map_rule
*mapr
, byte
*symbol
, rule
**ru
)
2111 if (map_rule_find (&mapr
, symbol
, ru
))
2114 (**ru
).m_referenced
= 1;
2120 returns 0 on success,
2121 returns 1 otherwise,
2123 static int update_dependencies (dict
*di
, map_rule
*mapr
, byte
**syntax_symbol
,
2124 byte
**string_symbol
, map_byte
*regbytes
)
2126 rule
*rulez
= di
->m_rulez
;
2128 /* update dependecies for the root and lexer symbols */
2129 if (update_dependency (mapr
, *syntax_symbol
, &di
->m_syntax
) ||
2130 (*string_symbol
!= NULL
&& update_dependency (mapr
, *string_symbol
, &di
->m_string
)))
2133 mem_free ((void **) syntax_symbol
);
2134 mem_free ((void **) string_symbol
);
2136 /* update dependecies for the rest of the rules */
2139 spec
*sp
= rulez
->m_specs
;
2141 /* iterate through all the specifiers */
2144 /* update dependency for identifier */
2145 if (sp
->m_spec_type
== st_identifier
|| sp
->m_spec_type
== st_identifier_loop
)
2147 if (update_dependency (mapr
, sp
->m_string
, &sp
->m_rule
))
2150 mem_free ((void **) &sp
->m_string
);
2153 /* some errtexts reference to a rule */
2154 if (sp
->m_errtext
&& sp
->m_errtext
->m_token_name
)
2156 if (update_dependency (mapr
, sp
->m_errtext
->m_token_name
, &sp
->m_errtext
->m_token
))
2159 mem_free ((void **) &sp
->m_errtext
->m_token_name
);
2162 /* update dependency for condition */
2166 for (i
= 0; i
< 2; i
++)
2167 if (sp
->m_cond
->m_operands
[i
].m_type
== cot_regbyte
)
2169 sp
->m_cond
->m_operands
[i
].m_regbyte
= map_byte_locate (®bytes
,
2170 sp
->m_cond
->m_operands
[i
].m_regname
);
2172 if (sp
->m_cond
->m_operands
[i
].m_regbyte
== NULL
)
2175 mem_free ((void **) &sp
->m_cond
->m_operands
[i
].m_regname
);
2179 /* update dependency for all .load instructions */
2182 emit
*em
= sp
->m_emits
;
2185 if (em
->m_emit_dest
== ed_regbyte
)
2187 em
->m_regbyte
= map_byte_locate (®bytes
, em
->m_regname
);
2189 if (em
->m_regbyte
== NULL
)
2192 mem_free ((void **) &em
->m_regname
);
2202 rulez
= rulez
->next
;
2205 /* check for unreferenced symbols */
2206 rulez
= di
->m_rulez
;
2207 while (rulez
!= NULL
)
2209 if (!rulez
->m_referenced
)
2211 map_rule
*ma
= mapr
;
2214 if (ma
->data
== rulez
)
2216 set_last_error (UNREFERENCED_IDENTIFIER
, str_duplicate (ma
->key
), -1);
2222 rulez
= rulez
->next
;
2228 static int satisfies_condition (cond
*co
, regbyte_ctx
*ctx
)
2236 for (i
= 0; i
< 2; i
++)
2237 switch (co
->m_operands
[i
].m_type
)
2240 values
[i
] = co
->m_operands
[i
].m_byte
;
2243 values
[i
] = regbyte_ctx_extract (&ctx
, co
->m_operands
[i
].m_regbyte
);
2250 return values
[0] == values
[1];
2252 return values
[0] != values
[1];
2258 static void free_regbyte_ctx_stack (regbyte_ctx
*top
, regbyte_ctx
*limit
)
2260 while (top
!= limit
)
2262 regbyte_ctx
*rbc
= top
->m_prev
;
2263 regbyte_ctx_destroy (&top
);
2268 typedef enum match_result_
2270 mr_not_matched
, /* the examined string does not match */
2271 mr_matched
, /* the examined string matches */
2272 mr_error_raised
, /* mr_not_matched + error has been raised */
2273 mr_dont_emit
, /* used by identifier loops only */
2274 mr_internal_error
/* an internal error has occured such as out of memory */
2278 This function does the main job. It parses the text and generates output data.
2280 static match_result
match (dict
*di
, const byte
*text
, unsigned int *index
, rule
*ru
, barray
**ba
,
2281 int filtering_string
, regbyte_ctx
**rbc
)
2283 unsigned int ind
= *index
;
2284 match_result status
= mr_not_matched
;
2285 spec
*sp
= ru
->m_specs
;
2286 regbyte_ctx
*ctx
= *rbc
;
2288 /* for every specifier in the rule */
2291 unsigned int i
, len
, save_ind
= ind
;
2292 barray
*array
= NULL
;
2294 if (satisfies_condition (sp
->m_cond
, ctx
))
2296 switch (sp
->m_spec_type
)
2299 barray_create (&array
);
2302 free_regbyte_ctx_stack (ctx
, *rbc
);
2303 return mr_internal_error
;
2306 status
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2308 if (status
== mr_internal_error
)
2310 free_regbyte_ctx_stack (ctx
, *rbc
);
2311 barray_destroy (&array
);
2312 return mr_internal_error
;
2316 len
= str_length (sp
->m_string
);
2318 /* prefilter the stream */
2319 if (!filtering_string
&& di
->m_string
)
2322 unsigned int filter_index
= 0;
2323 match_result result
;
2324 regbyte_ctx
*null_ctx
= NULL
;
2326 barray_create (&ba
);
2329 free_regbyte_ctx_stack (ctx
, *rbc
);
2330 return mr_internal_error
;
2333 result
= match (di
, text
+ ind
, &filter_index
, di
->m_string
, &ba
, 1, &null_ctx
);
2335 if (result
== mr_internal_error
)
2337 free_regbyte_ctx_stack (ctx
, *rbc
);
2338 barray_destroy (&ba
);
2339 return mr_internal_error
;
2342 if (result
!= mr_matched
)
2344 barray_destroy (&ba
);
2345 status
= mr_not_matched
;
2349 barray_destroy (&ba
);
2351 if (filter_index
!= len
|| !str_equal_n (sp
->m_string
, text
+ ind
, len
))
2353 status
= mr_not_matched
;
2357 status
= mr_matched
;
2362 status
= mr_matched
;
2363 for (i
= 0; status
== mr_matched
&& i
< len
; i
++)
2364 if (text
[ind
+ i
] != sp
->m_string
[i
])
2365 status
= mr_not_matched
;
2367 if (status
== mr_matched
)
2372 status
= text
[ind
] == *sp
->m_byte
? mr_matched
: mr_not_matched
;
2373 if (status
== mr_matched
)
2377 status
= (text
[ind
] >= sp
->m_byte
[0] && text
[ind
] <= sp
->m_byte
[1]) ?
2378 mr_matched
: mr_not_matched
;
2379 if (status
== mr_matched
)
2383 status
= mr_matched
;
2386 status
= mr_not_matched
;
2389 status
= ru
->m_oper
== op_and
? mr_matched
: mr_not_matched
;
2391 case st_identifier_loop
:
2392 barray_create (&array
);
2395 free_regbyte_ctx_stack (ctx
, *rbc
);
2396 return mr_internal_error
;
2399 status
= mr_dont_emit
;
2402 match_result result
;
2405 result
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2407 if (result
== mr_error_raised
)
2412 else if (result
== mr_matched
)
2414 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
) ||
2415 barray_append (ba
, &array
))
2417 free_regbyte_ctx_stack (ctx
, *rbc
);
2418 barray_destroy (&array
);
2419 return mr_internal_error
;
2421 barray_destroy (&array
);
2422 barray_create (&array
);
2425 free_regbyte_ctx_stack (ctx
, *rbc
);
2426 return mr_internal_error
;
2429 else if (result
== mr_internal_error
)
2431 free_regbyte_ctx_stack (ctx
, *rbc
);
2432 barray_destroy (&array
);
2433 return mr_internal_error
;
2443 status
= mr_not_matched
;
2446 if (status
== mr_error_raised
)
2448 free_regbyte_ctx_stack (ctx
, *rbc
);
2449 barray_destroy (&array
);
2451 return mr_error_raised
;
2454 if (ru
->m_oper
== op_and
&& status
!= mr_matched
&& status
!= mr_dont_emit
)
2456 free_regbyte_ctx_stack (ctx
, *rbc
);
2457 barray_destroy (&array
);
2461 set_last_error (sp
->m_errtext
->m_text
, error_get_token (sp
->m_errtext
, di
, text
,
2464 return mr_error_raised
;
2467 return mr_not_matched
;
2470 if (status
== mr_matched
)
2473 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
))
2475 free_regbyte_ctx_stack (ctx
, *rbc
);
2476 barray_destroy (&array
);
2477 return mr_internal_error
;
2481 if (barray_append (ba
, &array
))
2483 free_regbyte_ctx_stack (ctx
, *rbc
);
2484 barray_destroy (&array
);
2485 return mr_internal_error
;
2489 barray_destroy (&array
);
2491 /* if the rule operator is a logical or, we pick up the first matching specifier */
2492 if (ru
->m_oper
== op_or
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2502 /* everything went fine - all specifiers match up */
2503 if (ru
->m_oper
== op_and
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2510 free_regbyte_ctx_stack (ctx
, *rbc
);
2511 return mr_not_matched
;
2514 static match_result
fast_match (dict
*di
, const byte
*text
, unsigned int *index
, rule
*ru
, int *_PP
, bytepool
*_BP
,
2515 int filtering_string
, regbyte_ctx
**rbc
)
2517 unsigned int ind
= *index
;
2518 int _P
= filtering_string
? 0 : *_PP
;
2520 match_result status
= mr_not_matched
;
2521 spec
*sp
= ru
->m_specs
;
2522 regbyte_ctx
*ctx
= *rbc
;
2524 /* for every specifier in the rule */
2527 unsigned int i
, len
, save_ind
= ind
;
2529 _P2
= _P
+ (sp
->m_emits
? emit_size (sp
->m_emits
) : 0);
2530 if (bytepool_reserve (_BP
, _P2
))
2532 free_regbyte_ctx_stack (ctx
, *rbc
);
2533 return mr_internal_error
;
2536 if (satisfies_condition (sp
->m_cond
, ctx
))
2538 switch (sp
->m_spec_type
)
2541 status
= fast_match (di
, text
, &ind
, sp
->m_rule
, &_P2
, _BP
, filtering_string
, &ctx
);
2543 if (status
== mr_internal_error
)
2545 free_regbyte_ctx_stack (ctx
, *rbc
);
2546 return mr_internal_error
;
2550 len
= str_length (sp
->m_string
);
2552 /* prefilter the stream */
2553 if (!filtering_string
&& di
->m_string
)
2555 unsigned int filter_index
= 0;
2556 match_result result
;
2557 regbyte_ctx
*null_ctx
= NULL
;
2559 result
= fast_match (di
, text
+ ind
, &filter_index
, di
->m_string
, NULL
, _BP
, 1, &null_ctx
);
2561 if (result
== mr_internal_error
)
2563 free_regbyte_ctx_stack (ctx
, *rbc
);
2564 return mr_internal_error
;
2567 if (result
!= mr_matched
)
2569 status
= mr_not_matched
;
2573 if (filter_index
!= len
|| !str_equal_n (sp
->m_string
, text
+ ind
, len
))
2575 status
= mr_not_matched
;
2579 status
= mr_matched
;
2584 status
= mr_matched
;
2585 for (i
= 0; status
== mr_matched
&& i
< len
; i
++)
2586 if (text
[ind
+ i
] != sp
->m_string
[i
])
2587 status
= mr_not_matched
;
2589 if (status
== mr_matched
)
2594 status
= text
[ind
] == *sp
->m_byte
? mr_matched
: mr_not_matched
;
2595 if (status
== mr_matched
)
2599 status
= (text
[ind
] >= sp
->m_byte
[0] && text
[ind
] <= sp
->m_byte
[1]) ?
2600 mr_matched
: mr_not_matched
;
2601 if (status
== mr_matched
)
2605 status
= mr_matched
;
2608 status
= mr_not_matched
;
2611 status
= ru
->m_oper
== op_and
? mr_matched
: mr_not_matched
;
2613 case st_identifier_loop
:
2614 status
= mr_dont_emit
;
2617 match_result result
;
2620 result
= fast_match (di
, text
, &ind
, sp
->m_rule
, &_P2
, _BP
, filtering_string
, &ctx
);
2622 if (result
== mr_error_raised
)
2627 else if (result
== mr_matched
)
2629 if (!filtering_string
)
2631 if (sp
->m_emits
!= NULL
)
2633 if (emit_push (sp
->m_emits
, _BP
->_F
+ _P
, text
[ind
- 1], save_ind
, &ctx
))
2635 free_regbyte_ctx_stack (ctx
, *rbc
);
2636 return mr_internal_error
;
2641 _P2
+= sp
->m_emits
? emit_size (sp
->m_emits
) : 0;
2642 if (bytepool_reserve (_BP
, _P2
))
2644 free_regbyte_ctx_stack (ctx
, *rbc
);
2645 return mr_internal_error
;
2649 else if (result
== mr_internal_error
)
2651 free_regbyte_ctx_stack (ctx
, *rbc
);
2652 return mr_internal_error
;
2662 status
= mr_not_matched
;
2665 if (status
== mr_error_raised
)
2667 free_regbyte_ctx_stack (ctx
, *rbc
);
2669 return mr_error_raised
;
2672 if (ru
->m_oper
== op_and
&& status
!= mr_matched
&& status
!= mr_dont_emit
)
2674 free_regbyte_ctx_stack (ctx
, *rbc
);
2678 set_last_error (sp
->m_errtext
->m_text
, error_get_token (sp
->m_errtext
, di
, text
,
2681 return mr_error_raised
;
2684 return mr_not_matched
;
2687 if (status
== mr_matched
)
2689 if (sp
->m_emits
!= NULL
)
2690 if (emit_push (sp
->m_emits
, _BP
->_F
+ _P
, text
[ind
- 1], save_ind
, &ctx
))
2692 free_regbyte_ctx_stack (ctx
, *rbc
);
2693 return mr_internal_error
;
2699 /* if the rule operator is a logical or, we pick up the first matching specifier */
2700 if (ru
->m_oper
== op_or
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2704 if (!filtering_string
)
2712 /* everything went fine - all specifiers match up */
2713 if (ru
->m_oper
== op_and
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2717 if (!filtering_string
)
2722 free_regbyte_ctx_stack (ctx
, *rbc
);
2723 return mr_not_matched
;
2726 static byte
*error_get_token (error
*er
, dict
*di
, const byte
*text
, unsigned int ind
)
2733 unsigned int filter_index
= 0;
2734 regbyte_ctx
*ctx
= NULL
;
2736 barray_create (&ba
);
2739 if (match (di
, text
+ ind
, &filter_index
, er
->m_token
, &ba
, 0, &ctx
) == mr_matched
&&
2742 str
= (byte
*) mem_alloc (filter_index
+ 1);
2745 str_copy_n (str
, text
+ ind
, filter_index
);
2746 str
[filter_index
] = '\0';
2749 barray_destroy (&ba
);
2756 typedef struct grammar_load_state_
2759 byte
*syntax_symbol
;
2760 byte
*string_symbol
;
2764 } grammar_load_state
;
2766 static void grammar_load_state_create (grammar_load_state
**gr
)
2768 *gr
= (grammar_load_state
*) mem_alloc (sizeof (grammar_load_state
));
2772 (**gr
).syntax_symbol
= NULL
;
2773 (**gr
).string_symbol
= NULL
;
2780 static void grammar_load_state_destroy (grammar_load_state
**gr
)
2784 dict_destroy (&(**gr
).di
);
2785 mem_free ((void **) &(**gr
).syntax_symbol
);
2786 mem_free ((void **) &(**gr
).string_symbol
);
2787 map_str_destroy (&(**gr
).maps
);
2788 map_byte_destroy (&(**gr
).mapb
);
2789 map_rule_destroy (&(**gr
).mapr
);
2790 mem_free ((void **) gr
);
2798 grammar
grammar_load_from_text (const byte
*text
)
2800 grammar_load_state
*g
= NULL
;
2803 clear_last_error ();
2805 grammar_load_state_create (&g
);
2809 dict_create (&g
->di
);
2812 grammar_load_state_destroy (&g
);
2818 /* skip ".syntax" keyword */
2822 /* retrieve root symbol */
2823 if (get_identifier (&text
, &g
->syntax_symbol
))
2825 grammar_load_state_destroy (&g
);
2830 /* skip semicolon */
2836 byte
*symbol
= NULL
;
2837 int is_dot
= *text
== '.';
2842 if (get_identifier (&text
, &symbol
))
2844 grammar_load_state_destroy (&g
);
2850 if (is_dot
&& str_equal (symbol
, (byte
*) "emtcode"))
2852 map_byte
*ma
= NULL
;
2854 mem_free ((void **) (void *) &symbol
);
2856 if (get_emtcode (&text
, &ma
))
2858 grammar_load_state_destroy (&g
);
2862 map_byte_append (&g
->mapb
, ma
);
2865 else if (is_dot
&& str_equal (symbol
, (byte
*) "regbyte"))
2867 map_byte
*ma
= NULL
;
2869 mem_free ((void **) (void *) &symbol
);
2871 if (get_regbyte (&text
, &ma
))
2873 grammar_load_state_destroy (&g
);
2877 map_byte_append (&g
->di
->m_regbytes
, ma
);
2880 else if (is_dot
&& str_equal (symbol
, (byte
*) "errtext"))
2884 mem_free ((void **) (void *) &symbol
);
2886 if (get_errtext (&text
, &ma
))
2888 grammar_load_state_destroy (&g
);
2892 map_str_append (&g
->maps
, ma
);
2895 else if (is_dot
&& str_equal (symbol
, (byte
*) "string"))
2897 mem_free ((void **) (void *) &symbol
);
2899 if (g
->di
->m_string
!= NULL
)
2901 grammar_load_state_destroy (&g
);
2905 if (get_identifier (&text
, &g
->string_symbol
))
2907 grammar_load_state_destroy (&g
);
2911 /* skip semicolon */
2919 map_rule
*ma
= NULL
;
2921 if (get_rule (&text
, &ru
, g
->maps
, g
->mapb
))
2923 grammar_load_state_destroy (&g
);
2927 rule_append (&g
->di
->m_rulez
, ru
);
2929 /* if a rule consist of only one specifier, give it an ".and" operator */
2930 if (ru
->m_oper
== op_none
)
2931 ru
->m_oper
= op_and
;
2933 map_rule_create (&ma
);
2936 grammar_load_state_destroy (&g
);
2942 map_rule_append (&g
->mapr
, ma
);
2946 if (update_dependencies (g
->di
, g
->mapr
, &g
->syntax_symbol
, &g
->string_symbol
,
2949 grammar_load_state_destroy (&g
);
2953 dict_append (&g_dicts
, g
->di
);
2957 grammar_load_state_destroy (&g
);
2962 int grammar_set_reg8 (grammar id
, const byte
*name
, byte value
)
2965 map_byte
*reg
= NULL
;
2967 clear_last_error ();
2969 dict_find (&g_dicts
, id
, &di
);
2972 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
2976 reg
= map_byte_locate (&di
->m_regbytes
, name
);
2979 set_last_error (INVALID_REGISTER_NAME
, str_duplicate (name
), -1);
2988 internal checking function used by both grammar_check and grammar_fast_check functions
2990 static int _grammar_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
,
2991 unsigned int estimate_prod_size
, int use_fast_path
)
2994 unsigned int index
= 0;
2996 clear_last_error ();
2998 dict_find (&g_dicts
, id
, &di
);
3001 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
3010 regbyte_ctx
*rbc
= NULL
;
3011 bytepool
*bp
= NULL
;
3014 bytepool_create (&bp
, estimate_prod_size
);
3018 if (fast_match (di
, text
, &index
, di
->m_syntax
, &_P
, bp
, 0, &rbc
) != mr_matched
)
3020 bytepool_destroy (&bp
);
3021 free_regbyte_ctx_stack (rbc
, NULL
);
3025 free_regbyte_ctx_stack (rbc
, NULL
);
3030 bytepool_destroy (&bp
);
3034 regbyte_ctx
*rbc
= NULL
;
3037 barray_create (&ba
);
3041 if (match (di
, text
, &index
, di
->m_syntax
, &ba
, 0, &rbc
) != mr_matched
)
3043 barray_destroy (&ba
);
3044 free_regbyte_ctx_stack (rbc
, NULL
);
3048 free_regbyte_ctx_stack (rbc
, NULL
);
3050 *prod
= (byte
*) mem_alloc (ba
->len
* sizeof (byte
));
3053 barray_destroy (&ba
);
3057 mem_copy (*prod
, ba
->data
, ba
->len
* sizeof (byte
));
3059 barray_destroy (&ba
);
3065 int grammar_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
)
3067 return _grammar_check (id
, text
, prod
, size
, 0, 0);
3070 int grammar_fast_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
,
3071 unsigned int estimate_prod_size
)
3073 return _grammar_check (id
, text
, prod
, size
, estimate_prod_size
, 1);
3076 int grammar_destroy (grammar id
)
3078 dict
**di
= &g_dicts
;
3080 clear_last_error ();
3084 if ((**di
).m_id
== id
)
3088 dict_destroy (&tmp
);
3095 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
3099 static void append_character (const char x
, byte
*text
, int *dots_made
, int *len
, int size
)
3101 if (*dots_made
== 0)
3103 if (*len
< size
- 1)
3111 for (i
= 0; i
< 3; i
++)
3119 void grammar_get_last_error (byte
*text
, unsigned int size
, int *pos
)
3121 int len
= 0, dots_made
= 0;
3122 const byte
*p
= error_message
;
3132 const byte
*r
= error_param
;
3136 append_character (*r
++, text
, &dots_made
, &len
, (int) size
);
3143 append_character (*p
++, text
, &dots_made
, &len
, size
);
3148 *pos
= error_position
;