2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * syntax parsing engine
31 #ifndef GRAMMAR_PORT_BUILD
32 #error Do not build this file directly, build your grammar_XXX.c instead, which includes this file
42 The task is to check the syntax of an input string. Input string is a stream of ASCII
43 characters terminated with a null-character ('\0'). Checking it using C language is
44 difficult and hard to implement without bugs. It is hard to maintain and make changes when
47 This is because of a high redundancy of the C code. Large blocks of code are duplicated with
48 only small changes. Even use of macros does not solve the problem because macros cannot
49 erase the complexity of the problem.
51 The resolution is to create a new language that will be highly oriented to our task. Once
52 we describe a particular syntax, we are done. We can then focus on the code that implements
53 the language. The size and complexity of it is relatively small than the code that directly
56 First, we must implement our new language. Here, the language is implemented in C, but it
57 could also be implemented in any other language. The code is listed below. We must take
58 a good care that it is bug free. This is simple because the code is simple and clean.
60 Next, we must describe the syntax of our new language in itself. Once created and checked
61 manually that it is correct, we can use it to check another scripts.
63 Note that our new language loading code does not have to check the syntax. It is because we
64 assume that the script describing itself is correct, and other scripts can be syntactically
65 checked by the former script. The loading code must only do semantic checking which leads us to
66 simple resolving references.
71 Here I will describe the syntax of the new language (further called "Synek"). It is mainly a
72 sequence of declarations terminated by a semicolon. The declaration consists of a symbol,
73 which is an identifier, and its definition. A definition is in turn a sequence of specifiers
74 connected with ".and" or ".or" operator. These operators cannot be mixed together in a one
75 definition. Specifier can be a symbol, string, character, character range or a special
76 keyword ".true" or ".false".
78 On the very beginning of the script there is a declaration of a root symbol and is in the form:
79 .syntax <root_symbol>;
80 The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if
81 the root symbol evaluates to true. A symbol evaluates to true if the definition associated with
82 the symbol evaluates to true. Definition evaluation depends on the operator used to connect
83 specifiers in the definition. If ".and" operator is used, definition evaluates to true if and
84 only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to
85 true if any of the specifiers evaluates to true. If definition contains only one specifier,
86 it is evaluated as if it was connected with ".true" keyword by ".and" operator.
88 If specifier is a ".true" keyword, it always evaluates to true.
90 If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false
91 when it does not evaluate to true.
93 Character range specifier is in the form:
94 '<first_character>' - '<second_character>'
95 If specifier is a character range, it evaluates to true if character in the stream is greater
96 or equal to <first_character> and less or equal to <second_character>. In that situation
97 the stream pointer is advanced to point to next character in the stream. All C-style escape
98 sequences are supported although trigraph sequences are not. The comparisions are performed
99 on 8-bit unsigned integers.
101 Character specifier is in the form:
103 It evaluates to true if the following character range specifier evaluates to true:
104 '<single_character>' - '<single_character>'
106 String specifier is in the form:
108 Let N be the number of characters in <string>. Let <string>[i] designate i-th character in
109 <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)
110 the following character specifier evaluates to true:
112 If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.
114 Symbol specifier can be optionally preceded by a ".loop" keyword in the form:
116 where <symbol> is defined as follows:
117 <symbol> <definition>; (2)
118 Construction (1) is replaced by the following code:
120 and declaration (2) is replaced by the following:
121 <symbol$1> <symbol$2> .or .true;
122 <symbol$2> <symbol> .and <symbol$1>;
123 <symbol> <definition>;
125 Synek supports also a register mechanizm. User can, in its SYN file, declare a number of
126 registers that can be accessed in the syn body. Each reg has its name and a default value.
127 The register is one byte wide. The C code can change the default value by calling
128 grammar_set_reg8() with grammar id, register name and a new value. As we know, each rule is
129 a sequence of specifiers joined with .and or .or operator. And now each specifier can be
130 prefixed with a condition expression in a form ".if (<reg_name> <operator> <hex_literal>)"
131 where <operator> can be == or !=. If the condition evaluates to false, the specifier
132 evaluates to .false. Otherwise it evalutes to the specifier.
137 Synek supports all escape sequences in character specifiers. The mapping table is listed below.
138 All occurences of the characters in the first column are replaced with the corresponding
139 character in the second column.
141 Escape sequence Represents
142 ------------------------------------------------------------------------------------------------
150 \' Single quotation mark
151 \" Double quotation mark
153 \? Literal question mark
154 \ooo ASCII character in octal notation
155 \xhhh ASCII character in hexadecimal notation
156 ------------------------------------------------------------------------------------------------
161 Any specifier can be followed by a special construction that is executed when the specifier
162 evaluates to false. The construction is in the form:
164 <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is
166 .errtext <ERROR_TEXT> "<error_desc>"
167 When specifier evaluates to false and this construction is present, parsing is stopped
168 immediately and <error_desc> is returned as a result of parsing. The error position is also
169 returned and it is meant as an offset from the beggining of the stream to the character that
170 was valid so far. Example:
172 (**** syntax script ****)
175 .errtext MISSING_SEMICOLON "missing ';'"
176 program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and
177 .loop space .and '\0';
178 declaration "declare" .and .loop space .and identifier;
181 (**** sample code ****)
185 In the example above checking the sample code will result in error message "missing ';'" and
186 error position 12. The sample code is not correct. Note the presence of '\0' specifier to
187 assure that there is no code after semicolon - only spaces.
188 <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,
189 the identifier and dollar signs are replaced by a string retrieved by invoking symbol with
190 the identifier name. The starting position is the error position. The lenght of the resulting
191 string is the position after invoking the symbol.
196 Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers
197 that evaluate to true. That is, every specifier and optional error construction can be followed
198 by a number of emit constructions that are in the form:
200 <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by
201 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration
203 .emtcode <identifier> <hex_number>
205 When given specifier evaluates to true, all emits associated with the specifier are output
206 in order they were declared. A star means that last-read character should be output instead
207 of constant value. Example:
209 (**** syntax script ****)
212 .emtcode WORD_FOO 0x01
213 .emtcode WORD_BAR 0x02
214 foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;
215 FOO "foo" .and SPACE;
216 BAR "bar" .and SPACE;
219 (**** sample text 1 ****)
223 (**** sample text 2 ****)
227 For both samples the result will be one-element array. For first sample text it will be
228 value 1, for second - 0. Note that every text will be accepted because of presence of
229 .true as an alternative.
233 (**** syntax script ****)
236 .emtcode VARIABLE 0x01
237 declaration "declare" .and .loop space .and
238 identifier .emit VARIABLE .and (1)
239 .true .emit 0x00 .and (2)
240 .loop space .and ';';
242 identifier .loop id_char .emit *; (3)
243 id_char 'a'-'z' .or 'A'-'Z' .or '_';
245 (**** sample code ****)
249 In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to
250 true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used
251 to terminate the string with null to signal when the string ends. Specifier (3) outputs
252 all characters that make declared identifier. The result of sample code will be the
254 { 1, 'f', 'u', 'b', 'a', 'r', 0 }
256 If .emit is followed by dollar $, it means that current position should be output. Current
257 position is a 32-bit unsigned integer distance from the very beginning of the parsed string to
258 first character consumed by the specifier associated with the .emit instruction. Current
259 position is stored in the output buffer in Little-Endian convention (the lowest byte comes
263 static void mem_free (void **);
266 internal error messages
268 static const byte
*OUT_OF_MEMORY
= (byte
*) "internal error 1001: out of physical memory";
269 static const byte
*UNRESOLVED_REFERENCE
= (byte
*) "internal error 1002: unresolved reference '$'";
270 static const byte
*INVALID_GRAMMAR_ID
= (byte
*) "internal error 1003: invalid grammar object";
271 static const byte
*INVALID_REGISTER_NAME
= (byte
*) "internal error 1004: invalid register name: '$'";
272 /*static const byte *DUPLICATE_IDENTIFIER = (byte *) "internal error 1005: identifier '$' already defined";*/
273 static const byte
*UNREFERENCED_IDENTIFIER
=(byte
*) "internal error 1006: unreferenced identifier '$'";
275 static const byte
*error_message
= NULL
; /* points to one of the error messages above */
276 static byte
*error_param
= NULL
; /* this is inserted into error_message in place of $ */
277 static int error_position
= -1;
279 static byte
*unknown
= (byte
*) "???";
281 static void clear_last_error (void)
283 /* reset error message */
284 error_message
= NULL
;
286 /* free error parameter - if error_param is a "???" don't free it - it's static */
287 if (error_param
!= unknown
)
288 mem_free ((void **) (void *) &error_param
);
292 /* reset error position */
296 static void set_last_error (const byte
*msg
, byte
*param
, int pos
)
298 /* error message can be set only once */
299 if (error_message
!= NULL
)
301 mem_free ((void **) (void *) ¶m
);
307 /* if param is NULL, set error_param to unknown ("???") */
308 /* note: do not try to strdup the "???" - it may be that we are here because of */
309 /* out of memory error so strdup can fail */
313 error_param
= unknown
;
315 error_position
= pos
;
319 memory management routines
321 static void *mem_alloc (size_t size
)
323 void *ptr
= grammar_alloc_malloc (size
);
325 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
329 static void *mem_copy (void *dst
, const void *src
, size_t size
)
331 return grammar_memory_copy (dst
, src
, size
);
334 static void mem_free (void **ptr
)
336 grammar_alloc_free (*ptr
);
340 static void *mem_realloc (void *ptr
, size_t old_size
, size_t new_size
)
342 void *ptr2
= grammar_alloc_realloc (ptr
, old_size
, new_size
);
344 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
348 static byte
*str_copy_n (byte
*dst
, const byte
*src
, size_t max_len
)
350 return grammar_string_copy_n (dst
, src
, max_len
);
353 static byte
*str_duplicate (const byte
*str
)
355 byte
*new_str
= grammar_string_duplicate (str
);
357 set_last_error (OUT_OF_MEMORY
, NULL
, -1);
361 static int str_equal (const byte
*str1
, const byte
*str2
)
363 return grammar_string_compare (str1
, str2
) == 0;
366 static int str_equal_n (const byte
*str1
, const byte
*str2
, unsigned int n
)
368 return grammar_string_compare_n (str1
, str2
, n
) == 0;
372 str_length (const byte
*str
)
374 return (int) (grammar_string_length (str
));
380 #define GRAMMAR_IMPLEMENT_LIST_APPEND(_Ty)\
381 static void _Ty##_append (_Ty **x, _Ty *nx) {\
382 while (*x) x = &(**x).next;\
387 string to byte map typedef
389 typedef struct map_byte_
393 struct map_byte_
*next
;
396 static void map_byte_create (map_byte
**ma
)
398 *ma
= (map_byte
*) mem_alloc (sizeof (map_byte
));
407 static void map_byte_destroy (map_byte
**ma
)
411 map_byte_destroy (&(**ma
).next
);
412 mem_free ((void **) &(**ma
).key
);
413 mem_free ((void **) ma
);
417 GRAMMAR_IMPLEMENT_LIST_APPEND(map_byte
)
420 searches the map for the specified key,
421 returns pointer to the element with the specified key if it exists
422 returns NULL otherwise
424 static map_byte
*map_byte_locate (map_byte
**ma
, const byte
*key
)
428 if (str_equal ((**ma
).key
, key
))
434 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
439 searches the map for specified key,
440 if the key is matched, *data is filled with data associated with the key,
441 returns 0 if the key is matched,
444 static int map_byte_find (map_byte
**ma
, const byte
*key
, byte
*data
)
446 map_byte
*found
= map_byte_locate (ma
, key
);
458 regbyte context typedef
460 Each regbyte consists of its name and a default value. These are static and created at
461 grammar script compile-time, for example the following line:
462 .regbyte vertex_blend 0x00
463 adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.
464 When the script is executed, this regbyte can be accessed by name for read and write. When a
465 particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx
466 stack. The new entry contains information abot which regbyte it references and its new value.
467 When a given regbyte is accessed for read, the stack is searched top-down to find an
468 entry that references the regbyte. The first matching entry is used to return the current
469 value it holds. If no entry is found, the default value is returned.
471 typedef struct regbyte_ctx_
474 byte m_current_value
;
475 struct regbyte_ctx_
*m_prev
;
478 static void regbyte_ctx_create (regbyte_ctx
**re
)
480 *re
= (regbyte_ctx
*) mem_alloc (sizeof (regbyte_ctx
));
483 (**re
).m_regbyte
= NULL
;
484 (**re
).m_prev
= NULL
;
488 static void regbyte_ctx_destroy (regbyte_ctx
**re
)
492 mem_free ((void **) re
);
496 static byte
regbyte_ctx_extract (regbyte_ctx
**re
, map_byte
*reg
)
498 /* first lookup in the register stack */
501 if ((**re
).m_regbyte
== reg
)
502 return (**re
).m_current_value
;
507 /* if not found - return the default value */
514 typedef enum emit_type_
516 et_byte
, /* explicit number */
517 et_stream
, /* eaten character */
518 et_position
/* current position */
522 emit destination typedef
524 typedef enum emit_dest_
526 ed_output
, /* write to the output buffer */
527 ed_regbyte
/* write a particular regbyte */
535 emit_dest m_emit_dest
;
536 emit_type m_emit_type
; /* ed_output */
537 byte m_byte
; /* et_byte */
538 map_byte
*m_regbyte
; /* ed_regbyte */
539 byte
*m_regname
; /* ed_regbyte - temporary */
540 struct emit_
*m_next
;
543 static void emit_create (emit
**em
)
545 *em
= (emit
*) mem_alloc (sizeof (emit
));
548 (**em
).m_emit_dest
= ed_output
;
549 (**em
).m_emit_type
= et_byte
;
550 (**em
).m_byte
= '\0';
551 (**em
).m_regbyte
= NULL
;
552 (**em
).m_regname
= NULL
;
553 (**em
).m_next
= NULL
;
557 static void emit_destroy (emit
**em
)
561 emit_destroy (&(**em
).m_next
);
562 mem_free ((void **) &(**em
).m_regname
);
563 mem_free ((void **) em
);
567 static unsigned int emit_size (emit
*_E
)
573 if (_E
->m_emit_dest
== ed_output
)
575 if (_E
->m_emit_type
== et_position
)
576 n
+= 4; /* position is a 32-bit unsigned integer */
586 static int emit_push (emit
*_E
, byte
*_P
, byte c
, unsigned int _Pos
, regbyte_ctx
**_Ctx
)
590 if (_E
->m_emit_dest
== ed_output
)
592 if (_E
->m_emit_type
== et_byte
)
594 else if (_E
->m_emit_type
== et_stream
)
596 else /* _Em->type == et_position */
598 *_P
++ = (byte
) (_Pos
);
599 *_P
++ = (byte
) (_Pos
>> 8);
600 *_P
++ = (byte
) (_Pos
>> 16);
601 *_P
++ = (byte
) (_Pos
>> 24);
606 regbyte_ctx
*new_rbc
;
607 regbyte_ctx_create (&new_rbc
);
611 new_rbc
->m_prev
= *_Ctx
;
612 new_rbc
->m_regbyte
= _E
->m_regbyte
;
615 if (_E
->m_emit_type
== et_byte
)
616 new_rbc
->m_current_value
= _E
->m_byte
;
617 else if (_E
->m_emit_type
== et_stream
)
618 new_rbc
->m_current_value
= c
;
630 typedef struct error_
634 struct rule_
*m_token
;
637 static void error_create (error
**er
)
639 *er
= (error
*) mem_alloc (sizeof (error
));
642 (**er
).m_text
= NULL
;
643 (**er
).m_token_name
= NULL
;
644 (**er
).m_token
= NULL
;
648 static void error_destroy (error
**er
)
652 mem_free ((void **) &(**er
).m_text
);
653 mem_free ((void **) &(**er
).m_token_name
);
654 mem_free ((void **) er
);
661 error_get_token (error
*, struct dict_
*, const byte
*, int);
664 condition operand type typedef
666 typedef enum cond_oper_type_
668 cot_byte
, /* constant 8-bit unsigned integer */
669 cot_regbyte
/* pointer to byte register containing the current value */
673 condition operand typedef
675 typedef struct cond_oper_
677 cond_oper_type m_type
;
678 byte m_byte
; /* cot_byte */
679 map_byte
*m_regbyte
; /* cot_regbyte */
680 byte
*m_regname
; /* cot_regbyte - temporary */
684 condition type typedef
686 typedef enum cond_type_
698 cond_oper m_operands
[2];
701 static void cond_create (cond
**co
)
703 *co
= (cond
*) mem_alloc (sizeof (cond
));
706 (**co
).m_operands
[0].m_regname
= NULL
;
707 (**co
).m_operands
[1].m_regname
= NULL
;
711 static void cond_destroy (cond
**co
)
715 mem_free ((void **) &(**co
).m_operands
[0].m_regname
);
716 mem_free ((void **) &(**co
).m_operands
[1].m_regname
);
717 mem_free ((void **) co
);
722 specifier type typedef
724 typedef enum spec_type_
741 spec_type m_spec_type
;
742 byte m_byte
[2]; /* st_byte, st_byte_range */
743 byte
*m_string
; /* st_string */
744 struct rule_
*m_rule
; /* st_identifier, st_identifier_loop */
751 static void spec_create (spec
**sp
)
753 *sp
= (spec
*) mem_alloc (sizeof (spec
));
756 (**sp
).m_spec_type
= st_false
;
757 (**sp
).m_byte
[0] = '\0';
758 (**sp
).m_byte
[1] = '\0';
759 (**sp
).m_string
= NULL
;
760 (**sp
).m_rule
= NULL
;
761 (**sp
).m_emits
= NULL
;
762 (**sp
).m_errtext
= NULL
;
763 (**sp
).m_cond
= NULL
;
768 static void spec_destroy (spec
**sp
)
772 spec_destroy (&(**sp
).next
);
773 emit_destroy (&(**sp
).m_emits
);
774 error_destroy (&(**sp
).m_errtext
);
775 mem_free ((void **) &(**sp
).m_string
);
776 cond_destroy (&(**sp
).m_cond
);
777 mem_free ((void **) sp
);
781 GRAMMAR_IMPLEMENT_LIST_APPEND(spec
)
804 static void rule_create (rule
**ru
)
806 *ru
= (rule
*) mem_alloc (sizeof (rule
));
809 (**ru
).m_oper
= op_none
;
810 (**ru
).m_specs
= NULL
;
812 (**ru
).m_referenced
= 0;
816 static void rule_destroy (rule
**ru
)
820 rule_destroy (&(**ru
).next
);
821 spec_destroy (&(**ru
).m_specs
);
822 mem_free ((void **) ru
);
826 GRAMMAR_IMPLEMENT_LIST_APPEND(rule
)
829 returns unique grammar id
831 static grammar
next_valid_grammar_id (void)
833 static grammar id
= 0;
846 map_byte
*m_regbytes
;
851 static void dict_create (dict
**di
)
853 *di
= (dict
*) mem_alloc (sizeof (dict
));
856 (**di
).m_rulez
= NULL
;
857 (**di
).m_syntax
= NULL
;
858 (**di
).m_string
= NULL
;
859 (**di
).m_regbytes
= NULL
;
860 (**di
).m_id
= next_valid_grammar_id ();
865 static void dict_destroy (dict
**di
)
869 rule_destroy (&(**di
).m_rulez
);
870 map_byte_destroy (&(**di
).m_regbytes
);
871 mem_free ((void **) di
);
875 GRAMMAR_IMPLEMENT_LIST_APPEND(dict
)
877 static void dict_find (dict
**di
, grammar key
, dict
**data
)
881 if ((**di
).m_id
== key
)
893 static dict
*g_dicts
= NULL
;
898 typedef struct barray_
904 static void barray_create (barray
**ba
)
906 *ba
= (barray
*) mem_alloc (sizeof (barray
));
914 static void barray_destroy (barray
**ba
)
918 mem_free ((void **) &(**ba
).data
);
919 mem_free ((void **) ba
);
924 reallocates byte array to requested size,
925 returns 0 on success,
928 static int barray_resize (barray
**ba
, unsigned int nlen
)
934 mem_free ((void **) &(**ba
).data
);
942 new_pointer
= (byte
*) mem_realloc ((**ba
).data
, (**ba
).len
* sizeof (byte
),
943 nlen
* sizeof (byte
));
946 (**ba
).data
= new_pointer
;
957 adds byte array pointed by *nb to the end of array pointed by *ba,
958 returns 0 on success,
961 static int barray_append (barray
**ba
, barray
**nb
)
963 const unsigned int len
= (**ba
).len
;
965 if (barray_resize (ba
, (**ba
).len
+ (**nb
).len
))
968 mem_copy ((**ba
).data
+ len
, (**nb
).data
, (**nb
).len
);
974 adds emit chain pointed by em to the end of array pointed by *ba,
975 returns 0 on success,
978 static int barray_push (barray
**ba
, emit
*em
, byte c
, unsigned int pos
, regbyte_ctx
**rbc
)
980 unsigned int count
= emit_size (em
);
982 if (barray_resize (ba
, (**ba
).len
+ count
))
985 return emit_push (em
, (**ba
).data
+ ((**ba
).len
- count
), c
, pos
, rbc
);
991 typedef struct bytepool_
997 static void bytepool_destroy (bytepool
**by
)
1001 mem_free ((void **) &(**by
)._F
);
1002 mem_free ((void **) by
);
1006 static void bytepool_create (bytepool
**by
, int len
)
1008 *by
= (bytepool
*) (mem_alloc (sizeof (bytepool
)));
1011 (**by
)._F
= (byte
*) (mem_alloc (sizeof (byte
) * len
));
1014 if ((**by
)._F
== NULL
)
1015 bytepool_destroy (by
);
1019 static int bytepool_reserve (bytepool
*by
, unsigned int n
)
1026 /* byte pool can only grow and at least by doubling its size */
1027 n
= n
>= by
->_Siz
* 2 ? n
: by
->_Siz
* 2;
1029 /* reallocate the memory and adjust pointers to the new memory location */
1030 _P
= (byte
*) (mem_realloc (by
->_F
, sizeof (byte
) * by
->_Siz
, sizeof (byte
) * n
));
1042 string to string map typedef
1044 typedef struct map_str_
1048 struct map_str_
*next
;
1051 static void map_str_create (map_str
**ma
)
1053 *ma
= (map_str
*) mem_alloc (sizeof (map_str
));
1062 static void map_str_destroy (map_str
**ma
)
1066 map_str_destroy (&(**ma
).next
);
1067 mem_free ((void **) &(**ma
).key
);
1068 mem_free ((void **) &(**ma
).data
);
1069 mem_free ((void **) ma
);
1073 GRAMMAR_IMPLEMENT_LIST_APPEND(map_str
)
1076 searches the map for specified key,
1077 if the key is matched, *data is filled with data associated with the key,
1078 returns 0 if the key is matched,
1081 static int map_str_find (map_str
**ma
, const byte
*key
, byte
**data
)
1085 if (str_equal ((**ma
).key
, key
))
1087 *data
= str_duplicate ((**ma
).data
);
1097 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1102 string to rule map typedef
1104 typedef struct map_rule_
1108 struct map_rule_
*next
;
1111 static void map_rule_create (map_rule
**ma
)
1113 *ma
= (map_rule
*) mem_alloc (sizeof (map_rule
));
1122 static void map_rule_destroy (map_rule
**ma
)
1126 map_rule_destroy (&(**ma
).next
);
1127 mem_free ((void **) &(**ma
).key
);
1128 mem_free ((void **) ma
);
1132 GRAMMAR_IMPLEMENT_LIST_APPEND(map_rule
)
1135 searches the map for specified key,
1136 if the key is matched, *data is filled with data associated with the key,
1137 returns 0 if the is matched,
1140 static int map_rule_find (map_rule
**ma
, const byte
*key
, rule
**data
)
1144 if (str_equal ((**ma
).key
, key
))
1146 *data
= (**ma
).data
;
1154 set_last_error (UNRESOLVED_REFERENCE
, str_duplicate (key
), -1);
1159 returns 1 if given character is a white space,
1162 static int is_space (byte c
)
1164 return c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r';
1168 advances text pointer by 1 if character pointed by *text is a space,
1169 returns 1 if a space has been eaten,
1172 static int eat_space (const byte
**text
)
1174 if (is_space (**text
))
1185 returns 1 if text points to C-style comment start string,
1188 static int is_comment_start (const byte
*text
)
1190 return text
[0] == '/' && text
[1] == '*';
1194 advances text pointer to first character after C-style comment block - if any,
1195 returns 1 if C-style comment block has been encountered and eaten,
1198 static int eat_comment (const byte
**text
)
1200 if (is_comment_start (*text
))
1202 /* *text points to comment block - skip two characters to enter comment body */
1204 /* skip any character except consecutive '*' and '/' */
1205 while (!((*text
)[0] == '*' && (*text
)[1] == '/'))
1207 /* skip those two terminating characters */
1217 advances text pointer to first character that is neither space nor C-style comment block
1219 static void eat_spaces (const byte
**text
)
1221 while (eat_space (text
) || eat_comment (text
))
1226 resizes string pointed by *ptr to successfully add character c to the end of the string,
1227 returns 0 on success,
1230 static int string_grow (byte
**ptr
, unsigned int *len
, byte c
)
1232 /* reallocate the string in 16-byte increments */
1233 if ((*len
& 0x0F) == 0x0F || *ptr
== NULL
)
1235 byte
*tmp
= (byte
*) mem_realloc (*ptr
, ((*len
+ 1) & ~0x0F) * sizeof (byte
),
1236 ((*len
+ 1 + 0x10) & ~0x0F) * sizeof (byte
));
1245 /* append given character */
1249 (*ptr
)[*len
] = '\0';
1255 returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _
1258 static int is_identifier (byte c
)
1260 return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z') || (c
>= '0' && c
<= '9') || c
== '_';
1264 copies characters from *text to *id until non-identifier character is encountered,
1265 assumes that *id points to NULL object - caller is responsible for later freeing the string,
1266 text pointer is advanced to point past the copied identifier,
1267 returns 0 if identifier was successfully copied,
1270 static int get_identifier (const byte
**text
, byte
**id
)
1272 const byte
*t
= *text
;
1274 unsigned int len
= 0;
1276 if (string_grow (&p
, &len
, '\0'))
1279 /* loop while next character in buffer is valid for identifiers */
1280 while (is_identifier (*t
))
1282 if (string_grow (&p
, &len
, *t
++))
1284 mem_free ((void **) (void *) &p
);
1296 converts sequence of DEC digits pointed by *text until non-DEC digit is encountered,
1297 advances text pointer past the converted sequence,
1298 returns the converted value
1300 static unsigned int dec_convert (const byte
**text
)
1302 unsigned int value
= 0;
1304 while (**text
>= '0' && **text
<= '9')
1306 value
= value
* 10 + **text
- '0';
1314 returns 1 if given character is HEX digit 0-9, A-F or a-f,
1317 static int is_hex (byte c
)
1319 return (c
>= '0' && c
<= '9') || (c
>= 'A' && c
<= 'F') || (c
>= 'a' && c
<= 'f');
1323 returns value of passed character as if it was HEX digit
1325 static unsigned int hex2dec (byte c
)
1327 if (c
>= '0' && c
<= '9')
1329 if (c
>= 'A' && c
<= 'F')
1330 return c
- 'A' + 10;
1331 return c
- 'a' + 10;
1335 converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,
1336 advances text pointer past the converted sequence,
1337 returns the converted value
1339 static unsigned int hex_convert (const byte
**text
)
1341 unsigned int value
= 0;
1343 while (is_hex (**text
))
1345 value
= value
* 0x10 + hex2dec (**text
);
1353 returns 1 if given character is OCT digit 0-7,
1356 static int is_oct (byte c
)
1358 return c
>= '0' && c
<= '7';
1362 returns value of passed character as if it was OCT digit
1364 static int oct2dec (byte c
)
1369 static byte
get_escape_sequence (const byte
**text
)
1373 /* skip '\' character */
1401 return (byte
) hex_convert (text
);
1405 if (is_oct (**text
))
1407 value
= oct2dec (*(*text
)++);
1408 if (is_oct (**text
))
1410 value
= value
* 010 + oct2dec (*(*text
)++);
1411 if (is_oct (**text
))
1412 value
= value
* 010 + oct2dec (*(*text
)++);
1416 return (byte
) value
;
1420 copies characters from *text to *str until " or ' character is encountered,
1421 assumes that *str points to NULL object - caller is responsible for later freeing the string,
1422 assumes that *text points to " or ' character that starts the string,
1423 text pointer is advanced to point past the " or ' character,
1424 returns 0 if string was successfully copied,
1427 static int get_string (const byte
**text
, byte
**str
)
1429 const byte
*t
= *text
;
1431 unsigned int len
= 0;
1434 if (string_grow (&p
, &len
, '\0'))
1437 /* read " or ' character that starts the string */
1439 /* while next character is not the terminating character */
1440 while (*t
&& *t
!= term_char
)
1445 c
= get_escape_sequence (&t
);
1449 if (string_grow (&p
, &len
, c
))
1451 mem_free ((void **) (void *) &p
);
1455 /* skip " or ' character that ends the string */
1464 gets emit code, the syntax is:
1465 ".emtcode" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1466 assumes that *text already points to <symbol>,
1467 returns 0 if emit code is successfully read,
1470 static int get_emtcode (const byte
**text
, map_byte
**ma
)
1472 const byte
*t
= *text
;
1475 map_byte_create (&m
);
1479 if (get_identifier (&t
, &m
->key
))
1481 map_byte_destroy (&m
);
1490 if (get_string (&t
, &c
))
1492 map_byte_destroy (&m
);
1496 m
->data
= (byte
) c
[0];
1497 mem_free ((void **) (void *) &c
);
1499 else if (t
[0] == '0' && (t
[1] == 'x' || t
[1] == 'X'))
1501 /* skip HEX "0x" or "0X" prefix */
1503 m
->data
= (byte
) hex_convert (&t
);
1507 m
->data
= (byte
) dec_convert (&t
);
1518 gets regbyte declaration, the syntax is:
1519 ".regbyte" " " <symbol> " " (("0x" | "0X") <hex_value>) | <dec_value> | <character>
1520 assumes that *text already points to <symbol>,
1521 returns 0 if regbyte is successfully read,
1524 static int get_regbyte (const byte
**text
, map_byte
**ma
)
1526 /* pass it to the emtcode parser as it has the same syntax starting at <symbol> */
1527 return get_emtcode (text
, ma
);
1531 returns 0 on success,
1534 static int get_errtext (const byte
**text
, map_str
**ma
)
1536 const byte
*t
= *text
;
1539 map_str_create (&m
);
1543 if (get_identifier (&t
, &m
->key
))
1545 map_str_destroy (&m
);
1550 if (get_string (&t
, &m
->data
))
1552 map_str_destroy (&m
);
1563 returns 0 on success,
1564 returns 1 otherwise,
1566 static int get_error (const byte
**text
, error
**er
, map_str
*maps
)
1568 const byte
*t
= *text
;
1575 if (get_identifier (&t
, &temp
))
1579 if (!str_equal ((byte
*) "error", temp
))
1581 mem_free ((void **) (void *) &temp
);
1585 mem_free ((void **) (void *) &temp
);
1593 if (get_string (&t
, &(**er
).m_text
))
1602 if (get_identifier (&t
, &temp
))
1609 if (map_str_find (&maps
, temp
, &(**er
).m_text
))
1611 mem_free ((void **) (void *) &temp
);
1616 mem_free ((void **) (void *) &temp
);
1619 /* try to extract "token" from "...$token$..." */
1621 byte
*processed
= NULL
;
1622 unsigned int len
= 0;
1625 if (string_grow (&processed
, &len
, '\0'))
1631 while (i
< str_length ((**er
).m_text
))
1633 /* check if the dollar sign is repeated - if so skip it */
1634 if ((**er
).m_text
[i
] == '$' && (**er
).m_text
[i
+ 1] == '$')
1636 if (string_grow (&processed
, &len
, '$'))
1638 mem_free ((void **) (void *) &processed
);
1645 else if ((**er
).m_text
[i
] != '$')
1647 if (string_grow (&processed
, &len
, (**er
).m_text
[i
]))
1649 mem_free ((void **) (void *) &processed
);
1658 if (string_grow (&processed
, &len
, '$'))
1660 mem_free ((void **) (void *) &processed
);
1666 /* length of token being extracted */
1667 unsigned int tlen
= 0;
1669 if (string_grow (&(**er
).m_token_name
, &tlen
, '\0'))
1671 mem_free ((void **) (void *) &processed
);
1676 /* skip the dollar sign */
1679 while ((**er
).m_text
[i
] != '$')
1681 if (string_grow (&(**er
).m_token_name
, &tlen
, (**er
).m_text
[i
]))
1683 mem_free ((void **) (void *) &processed
);
1691 /* skip the dollar sign */
1697 mem_free ((void **) &(**er
).m_text
);
1698 (**er
).m_text
= processed
;
1706 returns 0 on success,
1707 returns 1 otherwise,
1709 static int get_emits (const byte
**text
, emit
**em
, map_byte
*mapb
)
1711 const byte
*t
= *text
;
1720 if (get_identifier (&t
, &temp
))
1725 if (str_equal ((byte
*) "emit", temp
))
1728 else if (str_equal ((byte
*) "load", temp
))
1732 mem_free ((void **) (void *) &temp
);
1736 mem_free ((void **) (void *) &temp
);
1742 e
->m_emit_dest
= dest
;
1744 if (dest
== ed_regbyte
)
1746 if (get_identifier (&t
, &e
->m_regname
))
1755 if (*t
== '0' && (t
[1] == 'x' || t
[1] == 'X'))
1758 e
->m_byte
= (byte
) hex_convert (&t
);
1760 e
->m_emit_type
= et_byte
;
1763 else if (*t
>= '0' && *t
<= '9')
1765 e
->m_byte
= (byte
) dec_convert (&t
);
1767 e
->m_emit_type
= et_byte
;
1774 e
->m_emit_type
= et_stream
;
1781 e
->m_emit_type
= et_position
;
1784 else if (*t
== '\'')
1786 if (get_string (&t
, &temp
))
1791 e
->m_byte
= (byte
) temp
[0];
1793 mem_free ((void **) (void *) &temp
);
1795 e
->m_emit_type
= et_byte
;
1799 if (get_identifier (&t
, &temp
))
1805 if (map_byte_find (&mapb
, temp
, &e
->m_byte
))
1807 mem_free ((void **) (void *) &temp
);
1812 mem_free ((void **) (void *) &temp
);
1814 e
->m_emit_type
= et_byte
;
1819 if (get_emits (&t
, &e
->m_next
, mapb
))
1831 returns 0 on success,
1832 returns 1 otherwise,
1834 static int get_spec (const byte
**text
, spec
**sp
, map_str
*maps
, map_byte
*mapb
)
1836 const byte
*t
= *text
;
1843 /* first - read optional .if statement */
1847 byte
*keyword
= NULL
;
1852 if (get_identifier (&u
, &keyword
))
1859 if (str_equal ((byte
*) "if", keyword
))
1861 cond_create (&s
->m_cond
);
1862 if (s
->m_cond
== NULL
)
1868 /* skip the left paren */
1872 /* get the left operand */
1874 if (get_identifier (&u
, &s
->m_cond
->m_operands
[0].m_regname
))
1879 s
->m_cond
->m_operands
[0].m_type
= cot_regbyte
;
1881 /* get the operator (!= or ==) */
1884 s
->m_cond
->m_type
= ct_not_equal
;
1886 s
->m_cond
->m_type
= ct_equal
;
1890 if (u
[0] == '0' && (u
[1] == 'x' || u
[1] == 'X'))
1892 /* skip the 0x prefix */
1895 /* get the right operand */
1896 s
->m_cond
->m_operands
[1].m_byte
= hex_convert (&u
);
1897 s
->m_cond
->m_operands
[1].m_type
= cot_byte
;
1899 else /*if (*u >= '0' && *u <= '9')*/
1901 /* get the right operand */
1902 s
->m_cond
->m_operands
[1].m_byte
= dec_convert (&u
);
1903 s
->m_cond
->m_operands
[1].m_type
= cot_byte
;
1906 /* skip the right paren */
1915 mem_free ((void **) (void *) &keyword
);
1922 if (get_string (&t
, &temp
))
1933 /* skip the '-' character */
1937 if (get_string (&t
, &temp2
))
1939 mem_free ((void **) (void *) &temp
);
1945 s
->m_spec_type
= st_byte_range
;
1946 s
->m_byte
[0] = *temp
;
1947 s
->m_byte
[1] = *temp2
;
1949 mem_free ((void **) (void *) &temp2
);
1953 s
->m_spec_type
= st_byte
;
1957 mem_free ((void **) (void *) &temp
);
1961 if (get_string (&t
, &s
->m_string
))
1968 s
->m_spec_type
= st_string
;
1972 byte
*keyword
= NULL
;
1977 if (get_identifier (&t
, &keyword
))
1985 if (str_equal ((byte
*) "true", keyword
))
1987 s
->m_spec_type
= st_true
;
1990 else if (str_equal ((byte
*) "false", keyword
))
1992 s
->m_spec_type
= st_false
;
1995 else if (str_equal ((byte
*) "debug", keyword
))
1997 s
->m_spec_type
= st_debug
;
2000 else if (str_equal ((byte
*) "loop", keyword
))
2002 if (get_identifier (&t
, &s
->m_string
))
2004 mem_free ((void **) (void *) &keyword
);
2010 s
->m_spec_type
= st_identifier_loop
;
2012 mem_free ((void **) (void *) &keyword
);
2016 if (get_identifier (&t
, &s
->m_string
))
2023 s
->m_spec_type
= st_identifier
;
2026 if (get_error (&t
, &s
->m_errtext
, maps
))
2032 if (get_emits (&t
, &s
->m_emits
, mapb
))
2044 returns 0 on success,
2045 returns 1 otherwise,
2047 static int get_rule (const byte
**text
, rule
**ru
, map_str
*maps
, map_byte
*mapb
)
2049 const byte
*t
= *text
;
2056 if (get_spec (&t
, &r
->m_specs
, maps
, mapb
))
2067 /* skip the dot that precedes "and" or "or" */
2070 /* read "and" or "or" keyword */
2071 if (get_identifier (&t
, &op
))
2078 if (r
->m_oper
== op_none
)
2081 if (str_equal ((byte
*) "and", op
))
2088 mem_free ((void **) (void *) &op
);
2090 if (get_spec (&t
, &sp
, maps
, mapb
))
2096 spec_append (&r
->m_specs
, sp
);
2099 /* skip the semicolon */
2109 returns 0 on success,
2110 returns 1 otherwise,
2112 static int update_dependency (map_rule
*mapr
, byte
*symbol
, rule
**ru
)
2114 if (map_rule_find (&mapr
, symbol
, ru
))
2117 (**ru
).m_referenced
= 1;
2123 returns 0 on success,
2124 returns 1 otherwise,
2126 static int update_dependencies (dict
*di
, map_rule
*mapr
, byte
**syntax_symbol
,
2127 byte
**string_symbol
, map_byte
*regbytes
)
2129 rule
*rulez
= di
->m_rulez
;
2131 /* update dependecies for the root and lexer symbols */
2132 if (update_dependency (mapr
, *syntax_symbol
, &di
->m_syntax
) ||
2133 (*string_symbol
!= NULL
&& update_dependency (mapr
, *string_symbol
, &di
->m_string
)))
2136 mem_free ((void **) syntax_symbol
);
2137 mem_free ((void **) string_symbol
);
2139 /* update dependecies for the rest of the rules */
2142 spec
*sp
= rulez
->m_specs
;
2144 /* iterate through all the specifiers */
2147 /* update dependency for identifier */
2148 if (sp
->m_spec_type
== st_identifier
|| sp
->m_spec_type
== st_identifier_loop
)
2150 if (update_dependency (mapr
, sp
->m_string
, &sp
->m_rule
))
2153 mem_free ((void **) &sp
->m_string
);
2156 /* some errtexts reference to a rule */
2157 if (sp
->m_errtext
&& sp
->m_errtext
->m_token_name
)
2159 if (update_dependency (mapr
, sp
->m_errtext
->m_token_name
, &sp
->m_errtext
->m_token
))
2162 mem_free ((void **) &sp
->m_errtext
->m_token_name
);
2165 /* update dependency for condition */
2169 for (i
= 0; i
< 2; i
++)
2170 if (sp
->m_cond
->m_operands
[i
].m_type
== cot_regbyte
)
2172 sp
->m_cond
->m_operands
[i
].m_regbyte
= map_byte_locate (®bytes
,
2173 sp
->m_cond
->m_operands
[i
].m_regname
);
2175 if (sp
->m_cond
->m_operands
[i
].m_regbyte
== NULL
)
2178 mem_free ((void **) &sp
->m_cond
->m_operands
[i
].m_regname
);
2182 /* update dependency for all .load instructions */
2185 emit
*em
= sp
->m_emits
;
2188 if (em
->m_emit_dest
== ed_regbyte
)
2190 em
->m_regbyte
= map_byte_locate (®bytes
, em
->m_regname
);
2192 if (em
->m_regbyte
== NULL
)
2195 mem_free ((void **) &em
->m_regname
);
2205 rulez
= rulez
->next
;
2208 /* check for unreferenced symbols */
2209 rulez
= di
->m_rulez
;
2210 while (rulez
!= NULL
)
2212 if (!rulez
->m_referenced
)
2214 map_rule
*ma
= mapr
;
2217 if (ma
->data
== rulez
)
2219 set_last_error (UNREFERENCED_IDENTIFIER
, str_duplicate (ma
->key
), -1);
2225 rulez
= rulez
->next
;
2231 static int satisfies_condition (cond
*co
, regbyte_ctx
*ctx
)
2239 for (i
= 0; i
< 2; i
++)
2240 switch (co
->m_operands
[i
].m_type
)
2243 values
[i
] = co
->m_operands
[i
].m_byte
;
2246 values
[i
] = regbyte_ctx_extract (&ctx
, co
->m_operands
[i
].m_regbyte
);
2253 return values
[0] == values
[1];
2255 return values
[0] != values
[1];
2261 static void free_regbyte_ctx_stack (regbyte_ctx
*top
, regbyte_ctx
*limit
)
2263 while (top
!= limit
)
2265 regbyte_ctx
*rbc
= top
->m_prev
;
2266 regbyte_ctx_destroy (&top
);
2271 typedef enum match_result_
2273 mr_not_matched
, /* the examined string does not match */
2274 mr_matched
, /* the examined string matches */
2275 mr_error_raised
, /* mr_not_matched + error has been raised */
2276 mr_dont_emit
, /* used by identifier loops only */
2277 mr_internal_error
/* an internal error has occured such as out of memory */
2281 * This function does the main job. It parses the text and generates output data.
2284 match (dict
*di
, const byte
*text
, int *index
, rule
*ru
, barray
**ba
, int filtering_string
,
2288 match_result status
= mr_not_matched
;
2289 spec
*sp
= ru
->m_specs
;
2290 regbyte_ctx
*ctx
= *rbc
;
2292 /* for every specifier in the rule */
2295 int i
, len
, save_ind
= ind
;
2296 barray
*array
= NULL
;
2298 if (satisfies_condition (sp
->m_cond
, ctx
))
2300 switch (sp
->m_spec_type
)
2303 barray_create (&array
);
2306 free_regbyte_ctx_stack (ctx
, *rbc
);
2307 return mr_internal_error
;
2310 status
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2312 if (status
== mr_internal_error
)
2314 free_regbyte_ctx_stack (ctx
, *rbc
);
2315 barray_destroy (&array
);
2316 return mr_internal_error
;
2320 len
= str_length (sp
->m_string
);
2322 /* prefilter the stream */
2323 if (!filtering_string
&& di
->m_string
)
2326 int filter_index
= 0;
2327 match_result result
;
2328 regbyte_ctx
*null_ctx
= NULL
;
2330 barray_create (&ba
);
2333 free_regbyte_ctx_stack (ctx
, *rbc
);
2334 return mr_internal_error
;
2337 result
= match (di
, text
+ ind
, &filter_index
, di
->m_string
, &ba
, 1, &null_ctx
);
2339 if (result
== mr_internal_error
)
2341 free_regbyte_ctx_stack (ctx
, *rbc
);
2342 barray_destroy (&ba
);
2343 return mr_internal_error
;
2346 if (result
!= mr_matched
)
2348 barray_destroy (&ba
);
2349 status
= mr_not_matched
;
2353 barray_destroy (&ba
);
2355 if (filter_index
!= len
|| !str_equal_n (sp
->m_string
, text
+ ind
, len
))
2357 status
= mr_not_matched
;
2361 status
= mr_matched
;
2366 status
= mr_matched
;
2367 for (i
= 0; status
== mr_matched
&& i
< len
; i
++)
2368 if (text
[ind
+ i
] != sp
->m_string
[i
])
2369 status
= mr_not_matched
;
2371 if (status
== mr_matched
)
2376 status
= text
[ind
] == *sp
->m_byte
? mr_matched
: mr_not_matched
;
2377 if (status
== mr_matched
)
2381 status
= (text
[ind
] >= sp
->m_byte
[0] && text
[ind
] <= sp
->m_byte
[1]) ?
2382 mr_matched
: mr_not_matched
;
2383 if (status
== mr_matched
)
2387 status
= mr_matched
;
2390 status
= mr_not_matched
;
2393 status
= ru
->m_oper
== op_and
? mr_matched
: mr_not_matched
;
2395 case st_identifier_loop
:
2396 barray_create (&array
);
2399 free_regbyte_ctx_stack (ctx
, *rbc
);
2400 return mr_internal_error
;
2403 status
= mr_dont_emit
;
2406 match_result result
;
2409 result
= match (di
, text
, &ind
, sp
->m_rule
, &array
, filtering_string
, &ctx
);
2411 if (result
== mr_error_raised
)
2416 else if (result
== mr_matched
)
2418 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
) ||
2419 barray_append (ba
, &array
))
2421 free_regbyte_ctx_stack (ctx
, *rbc
);
2422 barray_destroy (&array
);
2423 return mr_internal_error
;
2425 barray_destroy (&array
);
2426 barray_create (&array
);
2429 free_regbyte_ctx_stack (ctx
, *rbc
);
2430 return mr_internal_error
;
2433 else if (result
== mr_internal_error
)
2435 free_regbyte_ctx_stack (ctx
, *rbc
);
2436 barray_destroy (&array
);
2437 return mr_internal_error
;
2447 status
= mr_not_matched
;
2450 if (status
== mr_error_raised
)
2452 free_regbyte_ctx_stack (ctx
, *rbc
);
2453 barray_destroy (&array
);
2455 return mr_error_raised
;
2458 if (ru
->m_oper
== op_and
&& status
!= mr_matched
&& status
!= mr_dont_emit
)
2460 free_regbyte_ctx_stack (ctx
, *rbc
);
2461 barray_destroy (&array
);
2465 set_last_error (sp
->m_errtext
->m_text
, error_get_token (sp
->m_errtext
, di
, text
,
2468 return mr_error_raised
;
2471 return mr_not_matched
;
2474 if (status
== mr_matched
)
2477 if (barray_push (ba
, sp
->m_emits
, text
[ind
- 1], save_ind
, &ctx
))
2479 free_regbyte_ctx_stack (ctx
, *rbc
);
2480 barray_destroy (&array
);
2481 return mr_internal_error
;
2485 if (barray_append (ba
, &array
))
2487 free_regbyte_ctx_stack (ctx
, *rbc
);
2488 barray_destroy (&array
);
2489 return mr_internal_error
;
2493 barray_destroy (&array
);
2495 /* if the rule operator is a logical or, we pick up the first matching specifier */
2496 if (ru
->m_oper
== op_or
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2506 /* everything went fine - all specifiers match up */
2507 if (ru
->m_oper
== op_and
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2514 free_regbyte_ctx_stack (ctx
, *rbc
);
2515 return mr_not_matched
;
2519 fast_match (dict
*di
, const byte
*text
, int *index
, rule
*ru
, int *_PP
, bytepool
*_BP
,
2520 int filtering_string
, regbyte_ctx
**rbc
)
2523 int _P
= filtering_string
? 0 : *_PP
;
2525 match_result status
= mr_not_matched
;
2526 spec
*sp
= ru
->m_specs
;
2527 regbyte_ctx
*ctx
= *rbc
;
2529 /* for every specifier in the rule */
2532 int i
, len
, save_ind
= ind
;
2534 _P2
= _P
+ (sp
->m_emits
? emit_size (sp
->m_emits
) : 0);
2535 if (bytepool_reserve (_BP
, _P2
))
2537 free_regbyte_ctx_stack (ctx
, *rbc
);
2538 return mr_internal_error
;
2541 if (satisfies_condition (sp
->m_cond
, ctx
))
2543 switch (sp
->m_spec_type
)
2546 status
= fast_match (di
, text
, &ind
, sp
->m_rule
, &_P2
, _BP
, filtering_string
, &ctx
);
2548 if (status
== mr_internal_error
)
2550 free_regbyte_ctx_stack (ctx
, *rbc
);
2551 return mr_internal_error
;
2555 len
= str_length (sp
->m_string
);
2557 /* prefilter the stream */
2558 if (!filtering_string
&& di
->m_string
)
2560 int filter_index
= 0;
2561 match_result result
;
2562 regbyte_ctx
*null_ctx
= NULL
;
2564 result
= fast_match (di
, text
+ ind
, &filter_index
, di
->m_string
, NULL
, _BP
, 1, &null_ctx
);
2566 if (result
== mr_internal_error
)
2568 free_regbyte_ctx_stack (ctx
, *rbc
);
2569 return mr_internal_error
;
2572 if (result
!= mr_matched
)
2574 status
= mr_not_matched
;
2578 if (filter_index
!= len
|| !str_equal_n (sp
->m_string
, text
+ ind
, len
))
2580 status
= mr_not_matched
;
2584 status
= mr_matched
;
2589 status
= mr_matched
;
2590 for (i
= 0; status
== mr_matched
&& i
< len
; i
++)
2591 if (text
[ind
+ i
] != sp
->m_string
[i
])
2592 status
= mr_not_matched
;
2594 if (status
== mr_matched
)
2599 status
= text
[ind
] == *sp
->m_byte
? mr_matched
: mr_not_matched
;
2600 if (status
== mr_matched
)
2604 status
= (text
[ind
] >= sp
->m_byte
[0] && text
[ind
] <= sp
->m_byte
[1]) ?
2605 mr_matched
: mr_not_matched
;
2606 if (status
== mr_matched
)
2610 status
= mr_matched
;
2613 status
= mr_not_matched
;
2616 status
= ru
->m_oper
== op_and
? mr_matched
: mr_not_matched
;
2618 case st_identifier_loop
:
2619 status
= mr_dont_emit
;
2622 match_result result
;
2625 result
= fast_match (di
, text
, &ind
, sp
->m_rule
, &_P2
, _BP
, filtering_string
, &ctx
);
2627 if (result
== mr_error_raised
)
2632 else if (result
== mr_matched
)
2634 if (!filtering_string
)
2636 if (sp
->m_emits
!= NULL
)
2638 if (emit_push (sp
->m_emits
, _BP
->_F
+ _P
, text
[ind
- 1], save_ind
, &ctx
))
2640 free_regbyte_ctx_stack (ctx
, *rbc
);
2641 return mr_internal_error
;
2646 _P2
+= sp
->m_emits
? emit_size (sp
->m_emits
) : 0;
2647 if (bytepool_reserve (_BP
, _P2
))
2649 free_regbyte_ctx_stack (ctx
, *rbc
);
2650 return mr_internal_error
;
2654 else if (result
== mr_internal_error
)
2656 free_regbyte_ctx_stack (ctx
, *rbc
);
2657 return mr_internal_error
;
2667 status
= mr_not_matched
;
2670 if (status
== mr_error_raised
)
2672 free_regbyte_ctx_stack (ctx
, *rbc
);
2674 return mr_error_raised
;
2677 if (ru
->m_oper
== op_and
&& status
!= mr_matched
&& status
!= mr_dont_emit
)
2679 free_regbyte_ctx_stack (ctx
, *rbc
);
2683 set_last_error (sp
->m_errtext
->m_text
, error_get_token (sp
->m_errtext
, di
, text
,
2686 return mr_error_raised
;
2689 return mr_not_matched
;
2692 if (status
== mr_matched
)
2694 if (sp
->m_emits
!= NULL
)
2695 if (emit_push (sp
->m_emits
, _BP
->_F
+ _P
, text
[ind
- 1], save_ind
, &ctx
))
2697 free_regbyte_ctx_stack (ctx
, *rbc
);
2698 return mr_internal_error
;
2704 /* if the rule operator is a logical or, we pick up the first matching specifier */
2705 if (ru
->m_oper
== op_or
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2709 if (!filtering_string
)
2717 /* everything went fine - all specifiers match up */
2718 if (ru
->m_oper
== op_and
&& (status
== mr_matched
|| status
== mr_dont_emit
))
2722 if (!filtering_string
)
2727 free_regbyte_ctx_stack (ctx
, *rbc
);
2728 return mr_not_matched
;
2732 error_get_token (error
*er
, dict
*di
, const byte
*text
, int ind
)
2739 int filter_index
= 0;
2740 regbyte_ctx
*ctx
= NULL
;
2742 barray_create (&ba
);
2745 if (match (di
, text
+ ind
, &filter_index
, er
->m_token
, &ba
, 0, &ctx
) == mr_matched
&&
2748 str
= (byte
*) mem_alloc (filter_index
+ 1);
2751 str_copy_n (str
, text
+ ind
, filter_index
);
2752 str
[filter_index
] = '\0';
2755 barray_destroy (&ba
);
2762 typedef struct grammar_load_state_
2765 byte
*syntax_symbol
;
2766 byte
*string_symbol
;
2770 } grammar_load_state
;
2772 static void grammar_load_state_create (grammar_load_state
**gr
)
2774 *gr
= (grammar_load_state
*) mem_alloc (sizeof (grammar_load_state
));
2778 (**gr
).syntax_symbol
= NULL
;
2779 (**gr
).string_symbol
= NULL
;
2786 static void grammar_load_state_destroy (grammar_load_state
**gr
)
2790 dict_destroy (&(**gr
).di
);
2791 mem_free ((void **) &(**gr
).syntax_symbol
);
2792 mem_free ((void **) &(**gr
).string_symbol
);
2793 map_str_destroy (&(**gr
).maps
);
2794 map_byte_destroy (&(**gr
).mapb
);
2795 map_rule_destroy (&(**gr
).mapr
);
2796 mem_free ((void **) gr
);
2804 grammar
grammar_load_from_text (const byte
*text
)
2806 grammar_load_state
*g
= NULL
;
2809 clear_last_error ();
2811 grammar_load_state_create (&g
);
2815 dict_create (&g
->di
);
2818 grammar_load_state_destroy (&g
);
2824 /* skip ".syntax" keyword */
2828 /* retrieve root symbol */
2829 if (get_identifier (&text
, &g
->syntax_symbol
))
2831 grammar_load_state_destroy (&g
);
2836 /* skip semicolon */
2842 byte
*symbol
= NULL
;
2843 int is_dot
= *text
== '.';
2848 if (get_identifier (&text
, &symbol
))
2850 grammar_load_state_destroy (&g
);
2856 if (is_dot
&& str_equal (symbol
, (byte
*) "emtcode"))
2858 map_byte
*ma
= NULL
;
2860 mem_free ((void **) (void *) &symbol
);
2862 if (get_emtcode (&text
, &ma
))
2864 grammar_load_state_destroy (&g
);
2868 map_byte_append (&g
->mapb
, ma
);
2871 else if (is_dot
&& str_equal (symbol
, (byte
*) "regbyte"))
2873 map_byte
*ma
= NULL
;
2875 mem_free ((void **) (void *) &symbol
);
2877 if (get_regbyte (&text
, &ma
))
2879 grammar_load_state_destroy (&g
);
2883 map_byte_append (&g
->di
->m_regbytes
, ma
);
2886 else if (is_dot
&& str_equal (symbol
, (byte
*) "errtext"))
2890 mem_free ((void **) (void *) &symbol
);
2892 if (get_errtext (&text
, &ma
))
2894 grammar_load_state_destroy (&g
);
2898 map_str_append (&g
->maps
, ma
);
2901 else if (is_dot
&& str_equal (symbol
, (byte
*) "string"))
2903 mem_free ((void **) (void *) &symbol
);
2905 if (g
->di
->m_string
!= NULL
)
2907 grammar_load_state_destroy (&g
);
2911 if (get_identifier (&text
, &g
->string_symbol
))
2913 grammar_load_state_destroy (&g
);
2917 /* skip semicolon */
2925 map_rule
*ma
= NULL
;
2927 if (get_rule (&text
, &ru
, g
->maps
, g
->mapb
))
2929 grammar_load_state_destroy (&g
);
2933 rule_append (&g
->di
->m_rulez
, ru
);
2935 /* if a rule consist of only one specifier, give it an ".and" operator */
2936 if (ru
->m_oper
== op_none
)
2937 ru
->m_oper
= op_and
;
2939 map_rule_create (&ma
);
2942 grammar_load_state_destroy (&g
);
2948 map_rule_append (&g
->mapr
, ma
);
2952 if (update_dependencies (g
->di
, g
->mapr
, &g
->syntax_symbol
, &g
->string_symbol
,
2955 grammar_load_state_destroy (&g
);
2959 dict_append (&g_dicts
, g
->di
);
2963 grammar_load_state_destroy (&g
);
2968 int grammar_set_reg8 (grammar id
, const byte
*name
, byte value
)
2971 map_byte
*reg
= NULL
;
2973 clear_last_error ();
2975 dict_find (&g_dicts
, id
, &di
);
2978 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
2982 reg
= map_byte_locate (&di
->m_regbytes
, name
);
2985 set_last_error (INVALID_REGISTER_NAME
, str_duplicate (name
), -1);
2994 internal checking function used by both grammar_check and grammar_fast_check functions
2996 static int _grammar_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
,
2997 unsigned int estimate_prod_size
, int use_fast_path
)
3002 clear_last_error ();
3004 dict_find (&g_dicts
, id
, &di
);
3007 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
3016 regbyte_ctx
*rbc
= NULL
;
3017 bytepool
*bp
= NULL
;
3020 bytepool_create (&bp
, estimate_prod_size
);
3024 if (fast_match (di
, text
, &index
, di
->m_syntax
, &_P
, bp
, 0, &rbc
) != mr_matched
)
3026 bytepool_destroy (&bp
);
3027 free_regbyte_ctx_stack (rbc
, NULL
);
3031 free_regbyte_ctx_stack (rbc
, NULL
);
3036 bytepool_destroy (&bp
);
3040 regbyte_ctx
*rbc
= NULL
;
3043 barray_create (&ba
);
3047 if (match (di
, text
, &index
, di
->m_syntax
, &ba
, 0, &rbc
) != mr_matched
)
3049 barray_destroy (&ba
);
3050 free_regbyte_ctx_stack (rbc
, NULL
);
3054 free_regbyte_ctx_stack (rbc
, NULL
);
3056 *prod
= (byte
*) mem_alloc (ba
->len
* sizeof (byte
));
3059 barray_destroy (&ba
);
3063 mem_copy (*prod
, ba
->data
, ba
->len
* sizeof (byte
));
3065 barray_destroy (&ba
);
3071 int grammar_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
)
3073 return _grammar_check (id
, text
, prod
, size
, 0, 0);
3076 int grammar_fast_check (grammar id
, const byte
*text
, byte
**prod
, unsigned int *size
,
3077 unsigned int estimate_prod_size
)
3079 return _grammar_check (id
, text
, prod
, size
, estimate_prod_size
, 1);
3082 int grammar_destroy (grammar id
)
3084 dict
**di
= &g_dicts
;
3086 clear_last_error ();
3090 if ((**di
).m_id
== id
)
3094 dict_destroy (&tmp
);
3101 set_last_error (INVALID_GRAMMAR_ID
, NULL
, -1);
3105 static void append_character (const char x
, byte
*text
, int *dots_made
, int *len
, int size
)
3107 if (*dots_made
== 0)
3109 if (*len
< size
- 1)
3117 for (i
= 0; i
< 3; i
++)
3125 void grammar_get_last_error (byte
*text
, unsigned int size
, int *pos
)
3127 int len
= 0, dots_made
= 0;
3128 const byte
*p
= error_message
;
3138 const byte
*r
= error_param
;
3142 append_character (*r
++, text
, &dots_made
, &len
, (int) size
);
3149 append_character (*p
++, text
, &dots_made
, &len
, size
);
3154 *pos
= error_position
;