2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * syntax parsing engine
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
42 The task is to check the syntax of an input string. Input string is a stream of ASCII
43 characters terminated with a null-character ('\0'). Checking it using C language is
44 difficult and hard to implement without bugs. It is hard to maintain and make changes when
47 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
48 only small changes. Even use of macros does not solve the problem because macros cannot
49 erase the complexity of the problem.
51 The resolution is to create a new language that will be highly oriented to our task. Once
52 we describe a particular syntax, we are done. We can then focus on the code that implements
53 the language. The size and complexity of it is relatively small than the code that directly
56 First, we must implement our new language. Here, the language is implemented in C, but it
57 could also be implemented in any other language. The code is listed below. We must take
58 a good care that it is bug free. This is simple because the code is simple and clean.
60 Next, we must describe the syntax of our new language in itself. Once created and checked
61 manually that it is correct, we can use it to check another scripts.
63 Note that our new language loading code does not have to check the syntax. It is because we
64 assume that the script describing itself is correct, and other scripts can be syntactically
65 checked by the former script. The loading code must only do semantic checking which leads us to
66 simple resolving references.
71 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
72 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
73 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
74 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
75 definition. Specifier can be a symbol, string, character, character range or a special
76 keyword ".true" or ".false".
78 On the very beginning of the script there is a declaration of a root symbol and is in the form:
79 .syntax <root_symbol>;
80 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
81 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
82 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
83 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
84 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
85 true if any of the specifiers evaluates to true. If definition contains only one specifier,
86 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
88 If specifier is a ".true" keyword, it always evaluates to true.
90 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
91 when it does not evaluate to true.
93 Character range specifier is in the form:
94 '<first_character>' - '<second_character>'
95 If specifier is a character range, it evaluates to true if character in the stream is greater
96 or equal to <first_character> and less or equal to <second_character>. In that situation
97 the stream pointer is advanced to point to next character in the stream. All C-style escape
98 sequences are supported although trigraph sequences are not. The comparisions are performed
99 on 8-bit unsigned integers.
101 Character specifier is in the form:
103 It evaluates to true if the following character range specifier evaluates to true:
104 '<single_character>' - '<single_character>'
106 String specifier is in the form:
108 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
109 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
110 the following character specifier evaluates to true:
112 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
114 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
116 where <symbol> is defined as follows:
117 <symbol> <definition>; (2)
118 Construction (1) is replaced by the following code:
120 and declaration (2) is replaced by the following:
121 <symbol$1> <symbol$2> .or .true;
122 <symbol$2> <symbol> .and <symbol$1>;
123 <symbol> <definition>;
125 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
126 registers that can be accessed in the syn body. Each reg has its name and a default value.
127 The register is one byte wide. The C code can change the default value by calling
128 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
129 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
130 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
131 where <operator> can be == or !=. If the condition evaluates to false, the specifier
132 evaluates to .false. Otherwise it evalutes to the specifier.
137 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
138 All occurences of the characters in the first column are replaced with the corresponding
139 character in the second column.
141 Escape sequence Represents
142 ------------------------------------------------------------------------------------------------
150 \' Single quotation mark
151 \" Double quotation mark
153 \? Literal question mark
154 \ooo ASCII character in octal notation
155 \xhhh ASCII character in hexadecimal notation
156 ------------------------------------------------------------------------------------------------
161 Any specifier can be followed by a special construction that is executed when the specifier
162 evaluates to false. The construction is in the form:
164 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
166 .errtext <ERROR_TEXT> "<error_desc>"
167 When specifier evaluates to false and this construction is present, parsing is stopped
168 immediately and <error_desc> is returned as a result of parsing. The error position is also
169 returned and it is meant as an offset from the beggining of the stream to the character that
170 was valid so far. Example:
172 (**** syntax script ****)
175 .errtext MISSING_SEMICOLON "missing ';'"
176 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
177 .loop space .and '\0';
178 declaration "declare" .and .loop space .and identifier;
181 (**** sample code ****)
185 In the example above checking the sample code will result in error message "missing ';'" and
186 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
187 assure that there is no code after semicolon - only spaces.
188 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
189 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
190 the identifier name. The starting position is the error position. The lenght of the resulting
191 string is the position after invoking the symbol.
196 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
197 that evaluate to true. That is, every specifier and optional error construction can be followed
198 by a number of emit constructions that are in the form:
200 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
201 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
203 .emtcode <identifier> <hex_number>
205 When given specifier evaluates to true, all emits associated with the specifier are output
206 in order they were declared. A star means that last-read character should be output instead
207 of constant value. Example:
209 (**** syntax script ****)
212 .emtcode WORD_FOO 0x01
213 .emtcode WORD_BAR 0x02
214 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
215 FOO "foo" .and SPACE;
216 BAR "bar" .and SPACE;
219 (**** sample text 1 ****)
223 (**** sample text 2 ****)
227 For both samples the result will be one-element array. For first sample text it will be
228 value 1, for second - 0. Note that every text will be accepted because of presence of
229 .true as an alternative.
233 (**** syntax script ****)
236 .emtcode VARIABLE 0x01
237 declaration "declare" .and .loop space .and
238 identifier .emit VARIABLE .and (1)
239 .true .emit 0x00 .and (2)
240 .loop space .and ';';
242 identifier .loop id_char .emit *; (3)
243 id_char 'a'-'z' .or 'A'-'Z' .or '_';
245 (**** sample code ****)
249 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
250 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
251 to terminate the string with null to signal when the string ends. Specifier (3) outputs
252 all characters that make declared identifier. The result of sample code will be the
254 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
256 If .emit is followed by dollar $, it means that current position should be output. Current
257 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
258 first character consumed by the specifier associated with the .emit instruction. Current
259 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
263 static void mem_free (void **);
266 internal error messages
268 static const byte
*OUT_OF_MEMORY
= (byte
*) "internal error 1001: out of physical memory";
269 static const byte
*UNRESOLVED_REFERENCE
= (byte
*) "internal error 1002: unresolved reference '$'";
270 static const byte
*INVALID_GRAMMAR_ID
= (byte
*) "internal error 1003: invalid grammar object";
271 static const byte
*INVALID_REGISTER_NAME
= (byte
*) "internal error 1004: invalid register name: '$'";
272 static const byte
*DUPLICATE_IDENTIFIER
= (byte
*) "internal error 1005: identifier '$' already defined";
273 static const byte
*UNREFERENCED_IDENTIFIER
=(byte
*) "internal error 1006: unreferenced identifier '$'";
275 static const byte
*error_message
= NULL
; /* points to one of the error messages above */
276 static byte
*error_param
= NULL
; /* this is inserted into error_message in place of $ */
277 static int error_position
= -1;
279 static byte
*unknown
= (byte
*) "???";
281 static void clear_last_error (void)
283 /* reset error message */
284 error_message
= NULL
;
286 /* free error parameter - if error_param is a "???" don't free it - it's static */
287 if (error_param
!= unknown
)
288 mem_free ((void **) (void *) &error_param
);
292 /* reset error position */
296 static void set_last_error (const byte
*msg
, byte
*param
, int pos
)
298 /* error message can be set only once */
299 if (error_message
!= NULL
)
301 mem_free ((void **) (void *) ¶m
);
307 /* if param is NULL, set error_param to unknown ("???") */
308 /* note: do not try to strdup the "???" - it may be that we are here because of */
309 /* out of memory error so strdup can fail */
313 error_param
= unknown
;
315 error_position
= pos
;
319 memory management routines
321 static void *mem_alloc (size_t size
)
323 void *ptr
= grammar_alloc_malloc (size
);
325 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
329 static void *mem_copy (void *dst
, const void *src
, size_t size
)
331 return grammar_memory_copy (dst
, src
, size
);
334 static void mem_free (void **ptr
)
336 grammar_alloc_free (*ptr
);
340 static void *mem_realloc (void *ptr
, size_t old_size
, size_t new_size
)
342 void *ptr2
= grammar_alloc_realloc (ptr
, old_size
, new_size
);
344 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
348 static byte
*str_copy_n (byte
*dst
, const byte
*src
, size_t max_len
)
350 return grammar_string_copy_n (dst
, src
, max_len
);
353 static byte
*str_duplicate (const byte
*str
)
355 byte
*new_str
= grammar_string_duplicate (str
);
357 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
361 static int str_equal (const byte
*str1
, const byte
*str2
)
363 return grammar_string_compare (str1
, str2
) == 0;
366 static int str_equal_n (const byte
*str1
, const byte
*str2
, unsigned int n
)
368 return grammar_string_compare_n (str1
, str2
, n
) == 0;
371 static unsigned int str_length (const byte
*str
)
373 return grammar_string_length (str
);
379 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
380 static void _Ty##_append (_Ty **x, _Ty *nx) {\
381 while (*x) x = &(**x).next;\
386 string to byte map typedef
388 typedef struct map_byte_
392 struct map_byte_
*next
;
395 static void map_byte_create (map_byte
**ma
)
397 *ma
= (map_byte
*) mem_alloc (sizeof (map_byte
));
406 static void map_byte_destroy (map_byte
**ma
)
410 map_byte_destroy (&(**ma
).next
);
411 mem_free ((void **) &(**ma
).key
);
412 mem_free ((void **) ma
);
416 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte
)
419 searches the map for the specified key,
420 returns pointer to the element with the specified key if it exists
421 returns NULL otherwise
423 static map_byte
*map_byte_locate (map_byte
**ma
, const byte
*key
)
427 if (str_equal ((**ma
).key
, key
))
433 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
438 searches the map for specified key,
439 if the key is matched, *data is filled with data associated with the key,
440 returns 0 if the key is matched,
443 static int map_byte_find (map_byte
**ma
, const byte
*key
, byte
*data
)
445 map_byte
*found
= map_byte_locate (ma
, key
);
457 regbyte context typedef
459 Each regbyte consists of its name and a default value. These are static and created at
460 grammar script compile-time, for example the following line:
461 .regbyte vertex_blend 0x00
462 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
463 When the script is executed, this regbyte can be accessed by name for read and write. When a
464 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
465 stack. The new entry contains information abot which regbyte it references and its new value.
466 When a given regbyte is accessed for read, the stack is searched top-down to find an
467 entry that references the regbyte. The first matching entry is used to return the current
468 value it holds. If no entry is found, the default value is returned.
470 typedef struct regbyte_ctx_
473 byte m_current_value
;
474 struct regbyte_ctx_
*m_prev
;
477 static void regbyte_ctx_create (regbyte_ctx
**re
)
479 *re
= (regbyte_ctx
*) mem_alloc (sizeof (regbyte_ctx
));
482 (**re
).m_regbyte
= NULL
;
483 (**re
).m_prev
= NULL
;
487 static void regbyte_ctx_destroy (regbyte_ctx
**re
)
491 mem_free ((void **) re
);
495 static byte
regbyte_ctx_extract (regbyte_ctx
**re
, map_byte
*reg
)
497 /* first lookup in the register stack */
500 if ((**re
).m_regbyte
== reg
)
501 return (**re
).m_current_value
;
506 /* if not found - return the default value */
513 typedef enum emit_type_
515 et_byte
, /* explicit number */
516 et_stream
, /* eaten character */
517 et_position
/* current position */
521 emit destination typedef
523 typedef enum emit_dest_
525 ed_output
, /* write to the output buffer */
526 ed_regbyte
/* write a particular regbyte */
534 emit_dest m_emit_dest
;
535 emit_type m_emit_type
; /* ed_output */
536 byte m_byte
; /* et_byte */
537 map_byte
*m_regbyte
; /* ed_regbyte */
538 byte
*m_regname
; /* ed_regbyte - temporary */
539 struct emit_
*m_next
;
542 static void emit_create (emit
**em
)
544 *em
= (emit
*) mem_alloc (sizeof (emit
));
547 (**em
).m_emit_dest
= ed_output
;
548 (**em
).m_emit_type
= et_byte
;
549 (**em
).m_byte
= '\0';
550 (**em
).m_regbyte
= NULL
;
551 (**em
).m_regname
= NULL
;
552 (**em
).m_next
= NULL
;
556 static void emit_destroy (emit
**em
)
560 emit_destroy (&(**em
).m_next
);
561 mem_free ((void **) &(**em
).m_regname
);
562 mem_free ((void **) em
);
566 static unsigned int emit_size (emit
*_E
)
572 if (_E
->m_emit_dest
== ed_output
)
574 if (_E
->m_emit_type
== et_position
)
575 _N
+= 4; /* position is a 32-bit unsigned integer */
585 static int emit_push (emit
*_E
, byte
*_P
, byte _C
, unsigned int _Pos
, regbyte_ctx
**_Ctx
)
589 if (_E
->m_emit_dest
== ed_output
)
591 if (_E
->m_emit_type
== et_byte
)
593 else if (_E
->m_emit_type
== et_stream
)
595 else /* _Em->type == et_position */
597 *_P
++ = (byte
) (_Pos
);
598 *_P
++ = (byte
) (_Pos
>> 8);
599 *_P
++ = (byte
) (_Pos
>> 16);
600 *_P
++ = (byte
) (_Pos
>> 24);
605 regbyte_ctx
*new_rbc
;
606 regbyte_ctx_create (&new_rbc
);
610 new_rbc
->m_prev
= *_Ctx
;
611 new_rbc
->m_regbyte
= _E
->m_regbyte
;
614 if (_E
->m_emit_type
== et_byte
)
615 new_rbc
->m_current_value
= _E
->m_byte
;
616 else if (_E
->m_emit_type
== et_stream
)
617 new_rbc
->m_current_value
= _C
;
629 typedef struct error_
633 struct rule_
*m_token
;
636 static void error_create (error
**er
)
638 *er
= (error
*) mem_alloc (sizeof (error
));
641 (**er
).m_text
= NULL
;
642 (**er
).m_token_name
= NULL
;
643 (**er
).m_token
= NULL
;
647 static void error_destroy (error
**er
)
651 mem_free ((void **) &(**er
).m_text
);
652 mem_free ((void **) &(**er
).m_token_name
);
653 mem_free ((void **) er
);
658 static byte
*error_get_token (error
*, struct dict_
*, const byte
*, unsigned int);
661 condition operand type typedef
663 typedef enum cond_oper_type_
665 cot_byte
, /* constant 8-bit unsigned integer */
666 cot_regbyte
/* pointer to byte register containing the current value */
670 condition operand typedef
672 typedef struct cond_oper_
674 cond_oper_type m_type
;
675 byte m_byte
; /* cot_byte */
676 map_byte
*m_regbyte
; /* cot_regbyte */
677 byte
*m_regname
; /* cot_regbyte - temporary */
681 condition type typedef
683 typedef enum cond_type_
695 cond_oper m_operands
[2];
698 static void cond_create (cond
**co
)
700 *co
= (cond
*) mem_alloc (sizeof (cond
));
703 (**co
).m_operands
[0].m_regname
= NULL
;
704 (**co
).m_operands
[1].m_regname
= NULL
;
708 static void cond_destroy (cond
**co
)
712 mem_free ((void **) &(**co
).m_operands
[0].m_regname
);
713 mem_free ((void **) &(**co
).m_operands
[1].m_regname
);
714 mem_free ((void **) co
);
719 specifier type typedef
721 typedef enum spec_type_
738 spec_type m_spec_type
;
739 byte m_byte
[2]; /* st_byte, st_byte_range */
740 byte
*m_string
; /* st_string */
741 struct rule_
*m_rule
; /* st_identifier, st_identifier_loop */
748 static void spec_create (spec
**sp
)
750 *sp
= (spec
*) mem_alloc (sizeof (spec
));
753 (**sp
).m_spec_type
= st_false
;
754 (**sp
).m_byte
[0] = '\0';
755 (**sp
).m_byte
[1] = '\0';
756 (**sp
).m_string
= NULL
;
757 (**sp
).m_rule
= NULL
;
758 (**sp
).m_emits
= NULL
;
759 (**sp
).m_errtext
= NULL
;
760 (**sp
).m_cond
= NULL
;
765 static void spec_destroy (spec
**sp
)
769 spec_destroy (&(**sp
).next
);
770 emit_destroy (&(**sp
).m_emits
);
771 error_destroy (&(**sp
).m_errtext
);
772 mem_free ((void **) &(**sp
).m_string
);
773 cond_destroy (&(**sp
).m_cond
);
774 mem_free ((void **) sp
);
778 GRAMMAR_IMPLEMENT_LIST_APPEND(spec
)
801 static void rule_create (rule
**ru
)
803 *ru
= (rule
*) mem_alloc (sizeof (rule
));
806 (**ru
).m_oper
= op_none
;
807 (**ru
).m_specs
= NULL
;
809 (**ru
).m_referenced
= 0;
813 static void rule_destroy (rule
**ru
)
817 rule_destroy (&(**ru
).next
);
818 spec_destroy (&(**ru
).m_specs
);
819 mem_free ((void **) ru
);
823 GRAMMAR_IMPLEMENT_LIST_APPEND(rule
)
826 returns unique grammar id
828 static grammar
next_valid_grammar_id (void)
830 static grammar id
= 0;
843 map_byte
*m_regbytes
;
848 static void dict_create (dict
**di
)
850 *di
= (dict
*) mem_alloc (sizeof (dict
));
853 (**di
).m_rulez
= NULL
;
854 (**di
).m_syntax
= NULL
;
855 (**di
).m_string
= NULL
;
856 (**di
).m_regbytes
= NULL
;
857 (**di
).m_id
= next_valid_grammar_id ();
862 static void dict_destroy (dict
**di
)
866 rule_destroy (&(**di
).m_rulez
);
867 map_byte_destroy (&(**di
).m_regbytes
);
868 mem_free ((void **) di
);
872 GRAMMAR_IMPLEMENT_LIST_APPEND(dict
)
874 static void dict_find (dict
**di
, grammar key
, dict
**data
)
878 if ((**di
).m_id
== key
)
890 static dict
*g_dicts
= NULL
;
895 typedef struct barray_
901 static void barray_create (barray
**ba
)
903 *ba
= (barray
*) mem_alloc (sizeof (barray
));
911 static void barray_destroy (barray
**ba
)
915 mem_free ((void **) &(**ba
).data
);
916 mem_free ((void **) ba
);
921 reallocates byte array to requested size,
922 returns 0 on success,
925 static int barray_resize (barray
**ba
, unsigned int nlen
)
931 mem_free ((void **) &(**ba
).data
);
939 new_pointer
= (byte
*) mem_realloc ((**ba
).data
, (**ba
).len
* sizeof (byte
),
940 nlen
* sizeof (byte
));
943 (**ba
).data
= new_pointer
;
954 adds byte array pointed by *nb to the end of array pointed by *ba,
955 returns 0 on success,
958 static int barray_append (barray
**ba
, barray
**nb
)
960 const unsigned int len
= (**ba
).len
;
962 if (barray_resize (ba
, (**ba
).len
+ (**nb
).len
))
965 mem_copy ((**ba
).data
+ len
, (**nb
).data
, (**nb
).len
);
971 adds emit chain pointed by em to the end of array pointed by *ba,
972 returns 0 on success,
975 static int barray_push (barray
**ba
, emit
*em
, byte c
, unsigned int pos
, regbyte_ctx
**rbc
)
977 unsigned int count
= emit_size (em
);
979 if (barray_resize (ba
, (**ba
).len
+ count
))
982 return emit_push (em
, (**ba
).data
+ ((**ba
).len
- count
), c
, pos
, rbc
);
988 typedef struct bytepool_
994 static void bytepool_destroy (bytepool
**by
)
998 mem_free ((void **) &(**by
)._F
);
999 mem_free ((void **) by
);
1003 static void bytepool_create (bytepool
**by
, int len
)
1005 *by
= (bytepool
*) (mem_alloc (sizeof (bytepool
)));
1008 (**by
)._F
= (byte
*) (mem_alloc (sizeof (byte
) * len
));
1011 if ((**by
)._F
== NULL
)
1012 bytepool_destroy (by
);
1016 static int bytepool_reserve (bytepool
*by
, unsigned int _N
)
1023 /* byte pool can only grow and at least by doubling its size */
1024 _N
= _N
>= by
->_Siz
* 2 ? _N
: by
->_Siz
* 2;
1026 /* reallocate the memory and adjust pointers to the new memory location */
1027 _P
= (byte
*) (mem_realloc (by
->_F
, sizeof (byte
) * by
->_Siz
, sizeof (byte
) * _N
));
1039 string to string map typedef
1041 typedef struct map_str_
1045 struct map_str_
*next
;
1048 static void map_str_create (map_str
**ma
)
1050 *ma
= (map_str
*) mem_alloc (sizeof (map_str
));
1059 static void map_str_destroy (map_str
**ma
)
1063 map_str_destroy (&(**ma
).next
);
1064 mem_free ((void **) &(**ma
).key
);
1065 mem_free ((void **) &(**ma
).data
);
1066 mem_free ((void **) ma
);
1070 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str
)
1073 searches the map for specified key,
1074 if the key is matched, *data is filled with data associated with the key,
1075 returns 0 if the key is matched,
1078 static int map_str_find (map_str
**ma
, const byte
*key
, byte
**data
)
1082 if (str_equal ((**ma
).key
, key
))
1084 *data
= str_duplicate ((**ma
).data
);
1094 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1099 string to rule map typedef
1101 typedef struct map_rule_
1105 struct map_rule_
*next
;
1108 static void map_rule_create (map_rule
**ma
)
1110 *ma
= (map_rule
*) mem_alloc (sizeof (map_rule
));
1119 static void map_rule_destroy (map_rule
**ma
)
1123 map_rule_destroy (&(**ma
).next
);
1124 mem_free ((void **) &(**ma
).key
);
1125 mem_free ((void **) ma
);
1129 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule
)
1132 searches the map for specified key,
1133 if the key is matched, *data is filled with data associated with the key,
1134 returns 0 if the is matched,
1137 static int map_rule_find (map_rule
**ma
, const byte
*key
, rule
**data
)
1141 if (str_equal ((**ma
).key
, key
))
1143 *data
= (**ma
).data
;
1151 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1156 returns 1 if given character is a white space,
1159 static int is_space (byte c
)
1161 return c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r';
1165 advances text pointer by 1 if character pointed by *text is a space,
1166 returns 1 if a space has been eaten,
1169 static int eat_space (const byte
**text
)
1171 if (is_space (**text
))
1182 returns 1 if text points to C-style comment start string,
1185 static int is_comment_start (const byte
*text
)
1187 return text
[0] == '/' && text
[1] == '*';
1191 advances text pointer to first character after C-style comment block - if any,
1192 returns 1 if C-style comment block has been encountered and eaten,
1195 static int eat_comment (const byte
**text
)
1197 if (is_comment_start (*text
))
1199 /* *text points to comment block - skip two characters to enter comment body */
1201 /* skip any character except consecutive '*' and '/' */
1202 while (!((*text
)[0] == '*' && (*text
)[1] == '/'))
1204 /* skip those two terminating characters */
1214 advances text pointer to first character that is neither space nor C-style comment block
1216 static void eat_spaces (const byte
**text
)
1218 while (eat_space (text
) || eat_comment (text
))
1223 resizes string pointed by *ptr to successfully add character c to the end of the string,
1224 returns 0 on success,
1227 static int string_grow (byte
**ptr
, unsigned int *len
, byte c
)
1229 /* reallocate the string in 16-byte increments */
1230 if ((*len
& 0x0F) == 0x0F || *ptr
== NULL
)
1232 byte
*tmp
= (byte
*) mem_realloc (*ptr
, ((*len
+ 1) & ~0x0F) * sizeof (byte
),
1233 ((*len
+ 1 + 0x10) & ~0x0F) * sizeof (byte
));
1242 /* append given character */
1246 (*ptr
)[*len
] = '\0';
1252 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1255 static int is_identifier (byte c
)
1257 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || (c
>= '0' && c
<= '9') || c
== '_';
1261 copies characters from *text to *id until non-identifier character is encountered,
1262 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1263 text pointer is advanced to point past the copied identifier,
1264 returns 0 if identifier was successfully copied,
1267 static int get_identifier (const byte
**text
, byte
**id
)
1269 const byte
*t
= *text
;
1271 unsigned int len
= 0;
1273 if (string_grow (&p
, &len
, '\0'))
1276 /* loop while next character in buffer is valid for identifiers */
1277 while (is_identifier (*t
))
1279 if (string_grow (&p
, &len
, *t
++))
1281 mem_free ((void **) (void *) &p
);
1293 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1294 advances text pointer past the converted sequence,
1295 returns the converted value
1297 static unsigned int dec_convert (const byte
**text
)
1299 unsigned int value
= 0;
1301 while (**text
>= '0' && **text
<= '9')
1303 value
= value
* 10 + **text
- '0';
1311 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1314 static int is_hex (byte c
)
1316 return (c
>= '0' && c
<= '9') || (c
>= 'A' && c
<= 'F') || (c
>= 'a' && c
<= 'f');
1320 returns value of passed character as if it was HEX digit
1322 static unsigned int hex2dec (byte c
)
1324 if (c
>= '0' && c
<= '9')
1326 if (c
>= 'A' && c
<= 'F')
1327 return c
- 'A' + 10;
1328 return c
- 'a' + 10;
1332 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1333 advances text pointer past the converted sequence,
1334 returns the converted value
1336 static unsigned int hex_convert (const byte
**text
)
1338 unsigned int value
= 0;
1340 while (is_hex (**text
))
1342 value
= value
* 0x10 + hex2dec (**text
);
1350 returns 1 if given character is OCT digit 0-7,
1353 static int is_oct (byte c
)
1355 return c
>= '0' && c
<= '7';
1359 returns value of passed character as if it was OCT digit
1361 static int oct2dec (byte c
)
1366 static byte
get_escape_sequence (const byte
**text
)
1370 /* skip '\' character */
1398 return (byte
) hex_convert (text
);
1402 if (is_oct (**text
))
1404 value
= oct2dec (*(*text
)++);
1405 if (is_oct (**text
))
1407 value
= value
* 010 + oct2dec (*(*text
)++);
1408 if (is_oct (**text
))
1409 value
= value
* 010 + oct2dec (*(*text
)++);
1413 return (byte
) value
;
1417 copies characters from *text to *str until " or ' character is encountered,
1418 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1419 assumes that *text points to " or ' character that starts the string,
1420 text pointer is advanced to point past the " or ' character,
1421 returns 0 if string was successfully copied,
1424 static int get_string (const byte
**text
, byte
**str
)
1426 const byte
*t
= *text
;
1428 unsigned int len
= 0;
1431 if (string_grow (&p
, &len
, '\0'))
1434 /* read " or ' character that starts the string */
1436 /* while next character is not the terminating character */
1437 while (*t
&& *t
!= term_char
)
1442 c
= get_escape_sequence (&t
);
1446 if (string_grow (&p
, &len
, c
))
1448 mem_free ((void **) (void *) &p
);
1452 /* skip " or ' character that ends the string */
1461 gets emit code, the syntax is:
1462 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1463 assumes that *text already points to <symbol>,
1464 returns 0 if emit code is successfully read,
1467 static int get_emtcode (const byte
**text
, map_byte
**ma
)
1469 const byte
*t
= *text
;
1472 map_byte_create (&m
);
1476 if (get_identifier (&t
, &m
->key
))
1478 map_byte_destroy (&m
);
1487 if (get_string (&t
, &c
))
1489 map_byte_destroy (&m
);
1493 m
->data
= (byte
) c
[0];
1494 mem_free ((void **) (void *) &c
);
1496 else if (t
[0] == '0' && (t
[1] == 'x' || t
[1] == 'X'))
1498 /* skip HEX "0x" or "0X" prefix */
1500 m
->data
= (byte
) hex_convert (&t
);
1504 m
->data
= (byte
) dec_convert (&t
);
1515 gets regbyte declaration, the syntax is:
1516 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1517 assumes that *text already points to <symbol>,
1518 returns 0 if regbyte is successfully read,
1521 static int get_regbyte (const byte
**text
, map_byte
**ma
)
1523 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1524 return get_emtcode (text
, ma
);
1528 returns 0 on success,
1531 static int get_errtext (const byte
**text
, map_str
**ma
)
1533 const byte
*t
= *text
;
1536 map_str_create (&m
);
1540 if (get_identifier (&t
, &m
->key
))
1542 map_str_destroy (&m
);
1547 if (get_string (&t
, &m
->data
))
1549 map_str_destroy (&m
);
1560 returns 0 on success,
1561 returns 1 otherwise,
1563 static int get_error (const byte
**text
, error
**er
, map_str
*maps
)
1565 const byte
*t
= *text
;
1572 if (get_identifier (&t
, &temp
))
1576 if (!str_equal ((byte
*) "error", temp
))
1578 mem_free ((void **) (void *) &temp
);
1582 mem_free ((void **) (void *) &temp
);
1590 if (get_string (&t
, &(**er
).m_text
))
1599 if (get_identifier (&t
, &temp
))
1606 if (map_str_find (&maps
, temp
, &(**er
).m_text
))
1608 mem_free ((void **) (void *) &temp
);
1613 mem_free ((void **) (void *) &temp
);
1616 /* try to extract "token" from "...$token$..." */
1618 byte
*processed
= NULL
;
1619 unsigned int len
= 0, i
= 0;
1621 if (string_grow (&processed
, &len
, '\0'))
1627 while (i
< str_length ((**er
).m_text
))
1629 /* check if the dollar sign is repeated - if so skip it */
1630 if ((**er
).m_text
[i
] == '$' && (**er
).m_text
[i
+ 1] == '$')
1632 if (string_grow (&processed
, &len
, '$'))
1634 mem_free ((void **) (void *) &processed
);
1641 else if ((**er
).m_text
[i
] != '$')
1643 if (string_grow (&processed
, &len
, (**er
).m_text
[i
]))
1645 mem_free ((void **) (void *) &processed
);
1654 if (string_grow (&processed
, &len
, '$'))
1656 mem_free ((void **) (void *) &processed
);
1662 /* length of token being extracted */
1663 unsigned int tlen
= 0;
1665 if (string_grow (&(**er
).m_token_name
, &tlen
, '\0'))
1667 mem_free ((void **) (void *) &processed
);
1672 /* skip the dollar sign */
1675 while ((**er
).m_text
[i
] != '$')
1677 if (string_grow (&(**er
).m_token_name
, &tlen
, (**er
).m_text
[i
]))
1679 mem_free ((void **) (void *) &processed
);
1687 /* skip the dollar sign */
1693 mem_free ((void **) &(**er
).m_text
);
1694 (**er
).m_text
= processed
;
1702 returns 0 on success,
1703 returns 1 otherwise,
1705 static int get_emits (const byte
**text
, emit
**em
, map_byte
*mapb
)
1707 const byte
*t
= *text
;
1716 if (get_identifier (&t
, &temp
))
1721 if (str_equal ((byte
*) "emit", temp
))
1724 else if (str_equal ((byte
*) "load", temp
))
1728 mem_free ((void **) (void *) &temp
);
1732 mem_free ((void **) (void *) &temp
);
1738 e
->m_emit_dest
= dest
;
1740 if (dest
== ed_regbyte
)
1742 if (get_identifier (&t
, &e
->m_regname
))
1751 if (*t
== '0' && (t
[1] == 'x' || t
[1] == 'X'))
1754 e
->m_byte
= (byte
) hex_convert (&t
);
1756 e
->m_emit_type
= et_byte
;
1759 else if (*t
>= '0' && *t
<= '9')
1761 e
->m_byte
= (byte
) dec_convert (&t
);
1763 e
->m_emit_type
= et_byte
;
1770 e
->m_emit_type
= et_stream
;
1777 e
->m_emit_type
= et_position
;
1780 else if (*t
== '\'')
1782 if (get_string (&t
, &temp
))
1787 e
->m_byte
= (byte
) temp
[0];
1789 mem_free ((void **) (void *) &temp
);
1791 e
->m_emit_type
= et_byte
;
1795 if (get_identifier (&t
, &temp
))
1801 if (map_byte_find (&mapb
, temp
, &e
->m_byte
))
1803 mem_free ((void **) (void *) &temp
);
1808 mem_free ((void **) (void *) &temp
);
1810 e
->m_emit_type
= et_byte
;
1815 if (get_emits (&t
, &e
->m_next
, mapb
))
1827 returns 0 on success,
1828 returns 1 otherwise,
1830 static int get_spec (const byte
**text
, spec
**sp
, map_str
*maps
, map_byte
*mapb
)
1832 const byte
*t
= *text
;
1839 /* first - read optional .if statement */
1843 byte
*keyword
= NULL
;
1848 if (get_identifier (&u
, &keyword
))
1855 if (str_equal ((byte
*) "if", keyword
))
1857 cond_create (&s
->m_cond
);
1858 if (s
->m_cond
== NULL
)
1864 /* skip the left paren */
1868 /* get the left operand */
1870 if (get_identifier (&u
, &s
->m_cond
->m_operands
[0].m_regname
))
1875 s
->m_cond
->m_operands
[0].m_type
= cot_regbyte
;
1877 /* get the operator (!= or ==) */
1880 s
->m_cond
->m_type
= ct_not_equal
;
1882 s
->m_cond
->m_type
= ct_equal
;
1886 if (u
[0] == '0' && (u
[1] == 'x' || u
[1] == 'X'))
1888 /* skip the 0x prefix */
1891 /* get the right operand */
1892 s
->m_cond
->m_operands
[1].m_byte
= hex_convert (&u
);
1893 s
->m_cond
->m_operands
[1].m_type
= cot_byte
;
1895 else /*if (*u >= '0' && *u <= '9')*/
1897 /* get the right operand */
1898 s
->m_cond
->m_operands
[1].m_byte
= dec_convert (&u
);
1899 s
->m_cond
->m_operands
[1].m_type
= cot_byte
;
1902 /* skip the right paren */
1911 mem_free ((void **) (void *) &keyword
);
1918 if (get_string (&t
, &temp
))
1929 /* skip the '-' character */
1933 if (get_string (&t
, &temp2
))
1935 mem_free ((void **) (void *) &temp
);
1941 s
->m_spec_type
= st_byte_range
;
1942 s
->m_byte
[0] = *temp
;
1943 s
->m_byte
[1] = *temp2
;
1945 mem_free ((void **) (void *) &temp2
);
1949 s
->m_spec_type
= st_byte
;
1953 mem_free ((void **) (void *) &temp
);
1957 if (get_string (&t
, &s
->m_string
))
1964 s
->m_spec_type
= st_string
;
1968 byte
*keyword
= NULL
;
1973 if (get_identifier (&t
, &keyword
))
1981 if (str_equal ((byte
*) "true", keyword
))
1983 s
->m_spec_type
= st_true
;
1986 else if (str_equal ((byte
*) "false", keyword
))
1988 s
->m_spec_type
= st_false
;
1991 else if (str_equal ((byte
*) "debug", keyword
))
1993 s
->m_spec_type
= st_debug
;
1996 else if (str_equal ((byte
*) "loop", keyword
))
1998 if (get_identifier (&t
, &s
->m_string
))
2000 mem_free ((void **) (void *) &keyword
);
2006 s
->m_spec_type
= st_identifier_loop
;
2008 mem_free ((void **) (void *) &keyword
);
2012 if (get_identifier (&t
, &s
->m_string
))
2019 s
->m_spec_type
= st_identifier
;
2022 if (get_error (&t
, &s
->m_errtext
, maps
))
2028 if (get_emits (&t
, &s
->m_emits
, mapb
))
2040 returns 0 on success,
2041 returns 1 otherwise,
2043 static int get_rule (const byte
**text
, rule
**ru
, map_str
*maps
, map_byte
*mapb
)
2045 const byte
*t
= *text
;
2052 if (get_spec (&t
, &r
->m_specs
, maps
, mapb
))
2063 /* skip the dot that precedes "and" or "or" */
2066 /* read "and" or "or" keyword */
2067 if (get_identifier (&t
, &op
))
2074 if (r
->m_oper
== op_none
)
2077 if (str_equal ((byte
*) "and", op
))
2084 mem_free ((void **) (void *) &op
);
2086 if (get_spec (&t
, &sp
, maps
, mapb
))
2092 spec_append (&r
->m_specs
, sp
);
2095 /* skip the semicolon */
2105 returns 0 on success,
2106 returns 1 otherwise,
2108 static int update_dependency (map_rule
*mapr
, byte
*symbol
, rule
**ru
)
2110 if (map_rule_find (&mapr
, symbol
, ru
))
2113 (**ru
).m_referenced
= 1;
2119 returns 0 on success,
2120 returns 1 otherwise,
2122 static int update_dependencies (dict
*di
, map_rule
*mapr
, byte
**syntax_symbol
,
2123 byte
**string_symbol
, map_byte
*regbytes
)
2125 rule
*rulez
= di
->m_rulez
;
2127 /* update dependecies for the root and lexer symbols */
2128 if (update_dependency (mapr
, *syntax_symbol
, &di
->m_syntax
) ||
2129 (*string_symbol
!= NULL
&& update_dependency (mapr
, *string_symbol
, &di
->m_string
)))
2132 mem_free ((void **) syntax_symbol
);
2133 mem_free ((void **) string_symbol
);
2135 /* update dependecies for the rest of the rules */
2138 spec
*sp
= rulez
->m_specs
;
2140 /* iterate through all the specifiers */
2143 /* update dependency for identifier */
2144 if (sp
->m_spec_type
== st_identifier
|| sp
->m_spec_type
== st_identifier_loop
)
2146 if (update_dependency (mapr
, sp
->m_string
, &sp
->m_rule
))
2149 mem_free ((void **) &sp
->m_string
);
2152 /* some errtexts reference to a rule */
2153 if (sp
->m_errtext
&& sp
->m_errtext
->m_token_name
)
2155 if (update_dependency (mapr
, sp
->m_errtext
->m_token_name
, &sp
->m_errtext
->m_token
))
2158 mem_free ((void **) &sp
->m_errtext
->m_token_name
);
2161 /* update dependency for condition */
2165 for (i
= 0; i
< 2; i
++)
2166 if (sp
->m_cond
->m_operands
[i
].m_type
== cot_regbyte
)
2168 sp
->m_cond
->m_operands
[i
].m_regbyte
= map_byte_locate (®bytes
,
2169 sp
->m_cond
->m_operands
[i
].m_regname
);
2171 if (sp
->m_cond
->m_operands
[i
].m_regbyte
== NULL
)
2174 mem_free ((void **) &sp
->m_cond
->m_operands
[i
].m_regname
);
2178 /* update dependency for all .load instructions */
2181 emit
*em
= sp
->m_emits
;
2184 if (em
->m_emit_dest
== ed_regbyte
)
2186 em
->m_regbyte
= map_byte_locate (®bytes
, em
->m_regname
);
2188 if (em
->m_regbyte
== NULL
)
2191 mem_free ((void **) &em
->m_regname
);
2201 rulez
= rulez
->next
;
2204 /* check for unreferenced symbols */
2205 rulez
= di
->m_rulez
;
2206 while (rulez
!= NULL
)
2208 if (!rulez
->m_referenced
)
2210 map_rule
*ma
= mapr
;
2213 if (ma
->data
== rulez
)
2215 set_last_error (UNREFERENCED_IDENTIFIER
, str_duplicate (ma
->key
), -1);
2221 rulez
= rulez
->next
;
2227 static int satisfies_condition (cond
*co
, regbyte_ctx
*ctx
)
2235 for (i
= 0; i
< 2; i
++)
2236 switch (co
->m_operands
[i
].m_type
)
2239 values
[i
] = co
->m_operands
[i
].m_byte
;
2242 values
[i
] = regbyte_ctx_extract (&ctx
, co
->m_operands
[i
].m_regbyte
);
2249 return values
[0] == values
[1];
2251 return values
[0] != values
[1];
2257 static void free_regbyte_ctx_stack (regbyte_ctx
*top
, regbyte_ctx
*limit
)
2259 while (top
!= limit
)
2261 regbyte_ctx
*rbc
= top
->m_prev
;
2262 regbyte_ctx_destroy (&top
);
2267 typedef enum match_result_
2269 mr_not_matched
, /* the examined string does not match */
2270 mr_matched
, /* the examined string matches */
2271 mr_error_raised
, /* mr_not_matched + error has been raised */
2272 mr_dont_emit
, /* used by identifier loops only */
2273 mr_internal_error
/* an internal error has occured such as out of memory */
2277 This function does the main job. It parses the text and generates output data.
2279 static match_result
match (dict
*di
, const byte
*text
, unsigned int *index
, rule
*ru
, barray
**ba
,
2280 int filtering_string
, regbyte_ctx
**rbc
)
2282 unsigned int ind
= *index
;
2283 match_result status
= mr_not_matched
;
2284 spec
*sp
= ru
->m_specs
;
2285 regbyte_ctx
*ctx
= *rbc
;
2287 /* for every specifier in the rule */
2290 unsigned int i
, len
, save_ind
= ind
;
2291 barray
*array
= NULL
;
2293 if (satisfies_condition (sp
->m_cond
, ctx
))
2295 switch (sp
->m_spec_type
)
2298 barray_create (&array
);
2301 free_regbyte_ctx_stack (ctx
, *rbc
);
2302 return mr_internal_error
;
2305 status
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2307 if (status
== mr_internal_error
)
2309 free_regbyte_ctx_stack (ctx
, *rbc
);
2310 barray_destroy (&array
);
2311 return mr_internal_error
;
2315 len
= str_length (sp
->m_string
);
2317 /* prefilter the stream */
2318 if (!filtering_string
&& di
->m_string
)
2321 unsigned int filter_index
= 0;
2322 match_result result
;
2323 regbyte_ctx
*null_ctx
= NULL
;
2325 barray_create (&ba
);
2328 free_regbyte_ctx_stack (ctx
, *rbc
);
2329 return mr_internal_error
;
2332 result
= match (di
, text
+ ind
, &filter_index
, di
->m_string
, &ba
, 1, &null_ctx
);
2334 if (result
== mr_internal_error
)
2336 free_regbyte_ctx_stack (ctx
, *rbc
);
2337 barray_destroy (&ba
);
2338 return mr_internal_error
;
2341 if (result
!= mr_matched
)
2343 barray_destroy (&ba
);
2344 status
= mr_not_matched
;
2348 barray_destroy (&ba
);
2350 if (filter_index
!= len
|| !str_equal_n (sp
->m_string
, text
+ ind
, len
))
2352 status
= mr_not_matched
;
2356 status
= mr_matched
;
2361 status
= mr_matched
;
2362 for (i
= 0; status
== mr_matched
&& i
< len
; i
++)
2363 if (text
[ind
+ i
] != sp
->m_string
[i
])
2364 status
= mr_not_matched
;
2366 if (status
== mr_matched
)
2371 status
= text
[ind
] == *sp
->m_byte
? mr_matched
: mr_not_matched
;
2372 if (status
== mr_matched
)
2376 status
= (text
[ind
] >= sp
->m_byte
[0] && text
[ind
] <= sp
->m_byte
[1]) ?
2377 mr_matched
: mr_not_matched
;
2378 if (status
== mr_matched
)
2382 status
= mr_matched
;
2385 status
= mr_not_matched
;
2388 status
= ru
->m_oper
== op_and
? mr_matched
: mr_not_matched
;
2390 case st_identifier_loop
:
2391 barray_create (&array
);
2394 free_regbyte_ctx_stack (ctx
, *rbc
);
2395 return mr_internal_error
;
2398 status
= mr_dont_emit
;
2401 match_result result
;
2404 result
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2406 if (result
== mr_error_raised
)
2411 else if (result
== mr_matched
)
2413 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
) ||
2414 barray_append (ba
, &array
))
2416 free_regbyte_ctx_stack (ctx
, *rbc
);
2417 barray_destroy (&array
);
2418 return mr_internal_error
;
2420 barray_destroy (&array
);
2421 barray_create (&array
);
2424 free_regbyte_ctx_stack (ctx
, *rbc
);
2425 return mr_internal_error
;
2428 else if (result
== mr_internal_error
)
2430 free_regbyte_ctx_stack (ctx
, *rbc
);
2431 barray_destroy (&array
);
2432 return mr_internal_error
;
2442 status
= mr_not_matched
;
2445 if (status
== mr_error_raised
)
2447 free_regbyte_ctx_stack (ctx
, *rbc
);
2448 barray_destroy (&array
);
2450 return mr_error_raised
;
2453 if (ru
->m_oper
== op_and
&& status
!= mr_matched
&& status
!= mr_dont_emit
)
2455 free_regbyte_ctx_stack (ctx
, *rbc
);
2456 barray_destroy (&array
);
2460 set_last_error (sp
->m_errtext
->m_text
, error_get_token (sp
->m_errtext
, di
, text
,
2463 return mr_error_raised
;
2466 return mr_not_matched
;
2469 if (status
== mr_matched
)
2472 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
))
2474 free_regbyte_ctx_stack (ctx
, *rbc
);
2475 barray_destroy (&array
);
2476 return mr_internal_error
;
2480 if (barray_append (ba
, &array
))
2482 free_regbyte_ctx_stack (ctx
, *rbc
);
2483 barray_destroy (&array
);
2484 return mr_internal_error
;
2488 barray_destroy (&array
);
2490 /* if the rule operator is a logical or, we pick up the first matching specifier */
2491 if (ru
->m_oper
== op_or
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2501 /* everything went fine - all specifiers match up */
2502 if (ru
->m_oper
== op_and
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2509 free_regbyte_ctx_stack (ctx
, *rbc
);
2510 return mr_not_matched
;
2513 static match_result
fast_match (dict
*di
, const byte
*text
, unsigned int *index
, rule
*ru
, int *_PP
, bytepool
*_BP
,
2514 int filtering_string
, regbyte_ctx
**rbc
)
2516 unsigned int ind
= *index
;
2517 int _P
= filtering_string
? 0 : *_PP
;
2519 match_result status
= mr_not_matched
;
2520 spec
*sp
= ru
->m_specs
;
2521 regbyte_ctx
*ctx
= *rbc
;
2523 /* for every specifier in the rule */
2526 unsigned int i
, len
, save_ind
= ind
;
2528 _P2
= _P
+ (sp
->m_emits
? emit_size (sp
->m_emits
) : 0);
2529 if (bytepool_reserve (_BP
, _P2
))
2531 free_regbyte_ctx_stack (ctx
, *rbc
);
2532 return mr_internal_error
;
2535 if (satisfies_condition (sp
->m_cond
, ctx
))
2537 switch (sp
->m_spec_type
)
2540 status
= fast_match (di
, text
, &ind
, sp
->m_rule
, &_P2
, _BP
, filtering_string
, &ctx
);
2542 if (status
== mr_internal_error
)
2544 free_regbyte_ctx_stack (ctx
, *rbc
);
2545 return mr_internal_error
;
2549 len
= str_length (sp
->m_string
);
2551 /* prefilter the stream */
2552 if (!filtering_string
&& di
->m_string
)
2554 unsigned int filter_index
= 0;
2555 match_result result
;
2556 regbyte_ctx
*null_ctx
= NULL
;
2558 result
= fast_match (di
, text
+ ind
, &filter_index
, di
->m_string
, NULL
, _BP
, 1, &null_ctx
);
2560 if (result
== mr_internal_error
)
2562 free_regbyte_ctx_stack (ctx
, *rbc
);
2563 return mr_internal_error
;
2566 if (result
!= mr_matched
)
2568 status
= mr_not_matched
;
2572 if (filter_index
!= len
|| !str_equal_n (sp
->m_string
, text
+ ind
, len
))
2574 status
= mr_not_matched
;
2578 status
= mr_matched
;
2583 status
= mr_matched
;
2584 for (i
= 0; status
== mr_matched
&& i
< len
; i
++)
2585 if (text
[ind
+ i
] != sp
->m_string
[i
])
2586 status
= mr_not_matched
;
2588 if (status
== mr_matched
)
2593 status
= text
[ind
] == *sp
->m_byte
? mr_matched
: mr_not_matched
;
2594 if (status
== mr_matched
)
2598 status
= (text
[ind
] >= sp
->m_byte
[0] && text
[ind
] <= sp
->m_byte
[1]) ?
2599 mr_matched
: mr_not_matched
;
2600 if (status
== mr_matched
)
2604 status
= mr_matched
;
2607 status
= mr_not_matched
;
2610 status
= ru
->m_oper
== op_and
? mr_matched
: mr_not_matched
;
2612 case st_identifier_loop
:
2613 status
= mr_dont_emit
;
2616 match_result result
;
2619 result
= fast_match (di
, text
, &ind
, sp
->m_rule
, &_P2
, _BP
, filtering_string
, &ctx
);
2621 if (result
== mr_error_raised
)
2626 else if (result
== mr_matched
)
2628 if (!filtering_string
)
2630 if (sp
->m_emits
!= NULL
)
2632 if (emit_push (sp
->m_emits
, _BP
->_F
+ _P
, text
[ind
- 1], save_ind
, &ctx
))
2634 free_regbyte_ctx_stack (ctx
, *rbc
);
2635 return mr_internal_error
;
2640 _P2
+= sp
->m_emits
? emit_size (sp
->m_emits
) : 0;
2641 if (bytepool_reserve (_BP
, _P2
))
2643 free_regbyte_ctx_stack (ctx
, *rbc
);
2644 return mr_internal_error
;
2648 else if (result
== mr_internal_error
)
2650 free_regbyte_ctx_stack (ctx
, *rbc
);
2651 return mr_internal_error
;
2661 status
= mr_not_matched
;
2664 if (status
== mr_error_raised
)
2666 free_regbyte_ctx_stack (ctx
, *rbc
);
2668 return mr_error_raised
;
2671 if (ru
->m_oper
== op_and
&& status
!= mr_matched
&& status
!= mr_dont_emit
)
2673 free_regbyte_ctx_stack (ctx
, *rbc
);
2677 set_last_error (sp
->m_errtext
->m_text
, error_get_token (sp
->m_errtext
, di
, text
,
2680 return mr_error_raised
;
2683 return mr_not_matched
;
2686 if (status
== mr_matched
)
2688 if (sp
->m_emits
!= NULL
)
2689 if (emit_push (sp
->m_emits
, _BP
->_F
+ _P
, text
[ind
- 1], save_ind
, &ctx
))
2691 free_regbyte_ctx_stack (ctx
, *rbc
);
2692 return mr_internal_error
;
2698 /* if the rule operator is a logical or, we pick up the first matching specifier */
2699 if (ru
->m_oper
== op_or
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2703 if (!filtering_string
)
2711 /* everything went fine - all specifiers match up */
2712 if (ru
->m_oper
== op_and
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2716 if (!filtering_string
)
2721 free_regbyte_ctx_stack (ctx
, *rbc
);
2722 return mr_not_matched
;
2725 static byte
*error_get_token (error
*er
, dict
*di
, const byte
*text
, unsigned int ind
)
2732 unsigned int filter_index
= 0;
2733 regbyte_ctx
*ctx
= NULL
;
2735 barray_create (&ba
);
2738 if (match (di
, text
+ ind
, &filter_index
, er
->m_token
, &ba
, 0, &ctx
) == mr_matched
&&
2741 str
= (byte
*) mem_alloc (filter_index
+ 1);
2744 str_copy_n (str
, text
+ ind
, filter_index
);
2745 str
[filter_index
] = '\0';
2748 barray_destroy (&ba
);
2755 typedef struct grammar_load_state_
2758 byte
*syntax_symbol
;
2759 byte
*string_symbol
;
2763 } grammar_load_state
;
2765 static void grammar_load_state_create (grammar_load_state
**gr
)
2767 *gr
= (grammar_load_state
*) mem_alloc (sizeof (grammar_load_state
));
2771 (**gr
).syntax_symbol
= NULL
;
2772 (**gr
).string_symbol
= NULL
;
2779 static void grammar_load_state_destroy (grammar_load_state
**gr
)
2783 dict_destroy (&(**gr
).di
);
2784 mem_free ((void **) &(**gr
).syntax_symbol
);
2785 mem_free ((void **) &(**gr
).string_symbol
);
2786 map_str_destroy (&(**gr
).maps
);
2787 map_byte_destroy (&(**gr
).mapb
);
2788 map_rule_destroy (&(**gr
).mapr
);
2789 mem_free ((void **) gr
);
2797 grammar
grammar_load_from_text (const byte
*text
)
2799 grammar_load_state
*g
= NULL
;
2802 clear_last_error ();
2804 grammar_load_state_create (&g
);
2808 dict_create (&g
->di
);
2811 grammar_load_state_destroy (&g
);
2817 /* skip ".syntax" keyword */
2821 /* retrieve root symbol */
2822 if (get_identifier (&text
, &g
->syntax_symbol
))
2824 grammar_load_state_destroy (&g
);
2829 /* skip semicolon */
2835 byte
*symbol
= NULL
;
2836 int is_dot
= *text
== '.';
2841 if (get_identifier (&text
, &symbol
))
2843 grammar_load_state_destroy (&g
);
2849 if (is_dot
&& str_equal (symbol
, (byte
*) "emtcode"))
2851 map_byte
*ma
= NULL
;
2853 mem_free ((void **) (void *) &symbol
);
2855 if (get_emtcode (&text
, &ma
))
2857 grammar_load_state_destroy (&g
);
2861 map_byte_append (&g
->mapb
, ma
);
2864 else if (is_dot
&& str_equal (symbol
, (byte
*) "regbyte"))
2866 map_byte
*ma
= NULL
;
2868 mem_free ((void **) (void *) &symbol
);
2870 if (get_regbyte (&text
, &ma
))
2872 grammar_load_state_destroy (&g
);
2876 map_byte_append (&g
->di
->m_regbytes
, ma
);
2879 else if (is_dot
&& str_equal (symbol
, (byte
*) "errtext"))
2883 mem_free ((void **) (void *) &symbol
);
2885 if (get_errtext (&text
, &ma
))
2887 grammar_load_state_destroy (&g
);
2891 map_str_append (&g
->maps
, ma
);
2894 else if (is_dot
&& str_equal (symbol
, (byte
*) "string"))
2896 mem_free ((void **) (void *) &symbol
);
2898 if (g
->di
->m_string
!= NULL
)
2900 grammar_load_state_destroy (&g
);
2904 if (get_identifier (&text
, &g
->string_symbol
))
2906 grammar_load_state_destroy (&g
);
2910 /* skip semicolon */
2918 map_rule
*ma
= NULL
;
2920 if (get_rule (&text
, &ru
, g
->maps
, g
->mapb
))
2922 grammar_load_state_destroy (&g
);
2926 rule_append (&g
->di
->m_rulez
, ru
);
2928 /* if a rule consist of only one specifier, give it an ".and" operator */
2929 if (ru
->m_oper
== op_none
)
2930 ru
->m_oper
= op_and
;
2932 map_rule_create (&ma
);
2935 grammar_load_state_destroy (&g
);
2941 map_rule_append (&g
->mapr
, ma
);
2945 if (update_dependencies (g
->di
, g
->mapr
, &g
->syntax_symbol
, &g
->string_symbol
,
2948 grammar_load_state_destroy (&g
);
2952 dict_append (&g_dicts
, g
->di
);
2956 grammar_load_state_destroy (&g
);
2961 int grammar_set_reg8 (grammar id
, const byte
*name
, byte value
)
2964 map_byte
*reg
= NULL
;
2966 clear_last_error ();
2968 dict_find (&g_dicts
, id
, &di
);
2971 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
2975 reg
= map_byte_locate (&di
->m_regbytes
, name
);
2978 set_last_error (INVALID_REGISTER_NAME
, str_duplicate (name
), -1);
2987 internal checking function used by both grammar_check and grammar_fast_check functions
2989 static int _grammar_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
,
2990 unsigned int estimate_prod_size
, int use_fast_path
)
2993 unsigned int index
= 0;
2995 clear_last_error ();
2997 dict_find (&g_dicts
, id
, &di
);
3000 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
3009 regbyte_ctx
*rbc
= NULL
;
3010 bytepool
*bp
= NULL
;
3013 bytepool_create (&bp
, estimate_prod_size
);
3017 if (fast_match (di
, text
, &index
, di
->m_syntax
, &_P
, bp
, 0, &rbc
) != mr_matched
)
3019 bytepool_destroy (&bp
);
3020 free_regbyte_ctx_stack (rbc
, NULL
);
3024 free_regbyte_ctx_stack (rbc
, NULL
);
3029 bytepool_destroy (&bp
);
3033 regbyte_ctx
*rbc
= NULL
;
3036 barray_create (&ba
);
3040 if (match (di
, text
, &index
, di
->m_syntax
, &ba
, 0, &rbc
) != mr_matched
)
3042 barray_destroy (&ba
);
3043 free_regbyte_ctx_stack (rbc
, NULL
);
3047 free_regbyte_ctx_stack (rbc
, NULL
);
3049 *prod
= (byte
*) mem_alloc (ba
->len
* sizeof (byte
));
3052 barray_destroy (&ba
);
3056 mem_copy (*prod
, ba
->data
, ba
->len
* sizeof (byte
));
3058 barray_destroy (&ba
);
3064 int grammar_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
)
3066 return _grammar_check (id
, text
, prod
, size
, 0, 0);
3069 int grammar_fast_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
,
3070 unsigned int estimate_prod_size
)
3072 return _grammar_check (id
, text
, prod
, size
, estimate_prod_size
, 1);
3075 int grammar_destroy (grammar id
)
3077 dict
**di
= &g_dicts
;
3079 clear_last_error ();
3083 if ((**di
).m_id
== id
)
3087 dict_destroy (&tmp
);
3094 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
3098 static void append_character (const char x
, byte
*text
, int *dots_made
, int *len
, int size
)
3100 if (*dots_made
== 0)
3102 if (*len
< size
- 1)
3110 for (i
= 0; i
< 3; i
++)
3118 void grammar_get_last_error (byte
*text
, unsigned int size
, int *pos
)
3120 int len
= 0, dots_made
= 0;
3121 const byte
*p
= error_message
;
3131 const byte
*r
= error_param
;
3135 append_character (*r
++, text
, &dots_made
, &len
, (int) size
);
3142 append_character (*p
++, text
, &dots_made
, &len
, size
);
3147 *pos
= error_position
;