[multiple changes]
[gcc.git] / gcc / ada / s-regexp.adb
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S Y S T E M . R E G E X P --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1999-2008, AdaCore --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
21 -- --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
28 -- --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
31 -- --
32 ------------------------------------------------------------------------------
33
34 with Ada.Unchecked_Deallocation;
35
36 with System.Case_Util;
37
38 package body System.Regexp is
39
40 Open_Paren : constant Character := '(';
41 Close_Paren : constant Character := ')';
42 Open_Bracket : constant Character := '[';
43 Close_Bracket : constant Character := ']';
44
45 type State_Index is new Natural;
46 type Column_Index is new Natural;
47
48 type Regexp_Array is array
49 (State_Index range <>, Column_Index range <>) of State_Index;
50 -- First index is for the state number
51 -- Second index is for the character type
52 -- Contents is the new State
53
54 type Regexp_Array_Access is access Regexp_Array;
55 -- Use this type through the functions Set below, so that it
56 -- can grow dynamically depending on the needs.
57
58 type Mapping is array (Character'Range) of Column_Index;
59 -- Mapping between characters and column in the Regexp_Array
60
61 type Boolean_Array is array (State_Index range <>) of Boolean;
62
63 type Regexp_Value
64 (Alphabet_Size : Column_Index;
65 Num_States : State_Index) is
66 record
67 Map : Mapping;
68 States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
69 Is_Final : Boolean_Array (1 .. Num_States);
70 Case_Sensitive : Boolean;
71 end record;
72 -- Deterministic finite-state machine
73
74 -----------------------
75 -- Local Subprograms --
76 -----------------------
77
78 procedure Set
79 (Table : in out Regexp_Array_Access;
80 State : State_Index;
81 Column : Column_Index;
82 Value : State_Index);
83 -- Sets a value in the table. If the table is too small, reallocate it
84 -- dynamically so that (State, Column) is a valid index in it.
85
86 function Get
87 (Table : Regexp_Array_Access;
88 State : State_Index;
89 Column : Column_Index)
90 return State_Index;
91 -- Returns the value in the table at (State, Column).
92 -- If this index does not exist in the table, returns 0
93
94 procedure Free is new Ada.Unchecked_Deallocation
95 (Regexp_Array, Regexp_Array_Access);
96
97 ------------
98 -- Adjust --
99 ------------
100
101 procedure Adjust (R : in out Regexp) is
102 Tmp : Regexp_Access;
103
104 begin
105 Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
106 Num_States => R.R.Num_States);
107 Tmp.all := R.R.all;
108 R.R := Tmp;
109 end Adjust;
110
111 -------------
112 -- Compile --
113 -------------
114
115 function Compile
116 (Pattern : String;
117 Glob : Boolean := False;
118 Case_Sensitive : Boolean := True)
119 return Regexp
120 is
121 S : String := Pattern;
122 -- The pattern which is really compiled (when the pattern is case
123 -- insensitive, we convert this string to lower-cases
124
125 Map : Mapping := (others => 0);
126 -- Mapping between characters and columns in the tables
127
128 Alphabet_Size : Column_Index := 0;
129 -- Number of significant characters in the regular expression.
130 -- This total does not include special operators, such as *, (, ...
131
132 procedure Create_Mapping;
133 -- Creates a mapping between characters in the regexp and columns
134 -- in the tables representing the regexp. Test that the regexp is
135 -- well-formed Modifies Alphabet_Size and Map
136
137 procedure Create_Primary_Table
138 (Table : out Regexp_Array_Access;
139 Num_States : out State_Index;
140 Start_State : out State_Index;
141 End_State : out State_Index);
142 -- Creates the first version of the regexp (this is a non deterministic
143 -- finite state machine, which is unadapted for a fast pattern
144 -- matching algorithm). We use a recursive algorithm to process the
145 -- parenthesis sub-expressions.
146 --
147 -- Table : at the end of the procedure : Column 0 is for any character
148 -- ('.') and the last columns are for no character (closure)
149 -- Num_States is set to the number of states in the table
150 -- Start_State is the number of the starting state in the regexp
151 -- End_State is the number of the final state when the regexp matches
152
153 procedure Create_Primary_Table_Glob
154 (Table : out Regexp_Array_Access;
155 Num_States : out State_Index;
156 Start_State : out State_Index;
157 End_State : out State_Index);
158 -- Same function as above, but it deals with the second possible
159 -- grammar for 'globbing pattern', which is a kind of subset of the
160 -- whole regular expression grammar.
161
162 function Create_Secondary_Table
163 (First_Table : Regexp_Array_Access;
164 Num_States : State_Index;
165 Start_State : State_Index;
166 End_State : State_Index)
167 return Regexp;
168 -- Creates the definitive table representing the regular expression
169 -- This is actually a transformation of the primary table First_Table,
170 -- where every state is grouped with the states in its 'no-character'
171 -- columns. The transitions between the new states are then recalculated
172 -- and if necessary some new states are created.
173 --
174 -- Note that the resulting finite-state machine is not optimized in
175 -- terms of the number of states : it would be more time-consuming to
176 -- add a third pass to reduce the number of states in the machine, with
177 -- no speed improvement...
178
179 procedure Raise_Exception (M : String; Index : Integer);
180 pragma No_Return (Raise_Exception);
181 -- Raise an exception, indicating an error at character Index in S
182
183 --------------------
184 -- Create_Mapping --
185 --------------------
186
187 procedure Create_Mapping is
188
189 procedure Add_In_Map (C : Character);
190 -- Add a character in the mapping, if it is not already defined
191
192 ----------------
193 -- Add_In_Map --
194 ----------------
195
196 procedure Add_In_Map (C : Character) is
197 begin
198 if Map (C) = 0 then
199 Alphabet_Size := Alphabet_Size + 1;
200 Map (C) := Alphabet_Size;
201 end if;
202 end Add_In_Map;
203
204 J : Integer := S'First;
205 Parenthesis_Level : Integer := 0;
206 Curly_Level : Integer := 0;
207 Last_Open : Integer := S'First - 1;
208
209 -- Start of processing for Create_Mapping
210
211 begin
212 while J <= S'Last loop
213 case S (J) is
214 when Open_Bracket =>
215 J := J + 1;
216
217 if S (J) = '^' then
218 J := J + 1;
219 end if;
220
221 if S (J) = ']' or S (J) = '-' then
222 J := J + 1;
223 end if;
224
225 -- The first character never has a special meaning
226
227 loop
228 if J > S'Last then
229 Raise_Exception
230 ("Ran out of characters while parsing ", J);
231 end if;
232
233 exit when S (J) = Close_Bracket;
234
235 if S (J) = '-'
236 and then S (J + 1) /= Close_Bracket
237 then
238 declare
239 Start : constant Integer := J - 1;
240
241 begin
242 J := J + 1;
243
244 if S (J) = '\' then
245 J := J + 1;
246 end if;
247
248 for Char in S (Start) .. S (J) loop
249 Add_In_Map (Char);
250 end loop;
251 end;
252 else
253 if S (J) = '\' then
254 J := J + 1;
255 end if;
256
257 Add_In_Map (S (J));
258 end if;
259
260 J := J + 1;
261 end loop;
262
263 -- A close bracket must follow a open_bracket,
264 -- and cannot be found alone on the line
265
266 when Close_Bracket =>
267 Raise_Exception
268 ("Incorrect character ']' in regular expression", J);
269
270 when '\' =>
271 if J < S'Last then
272 J := J + 1;
273 Add_In_Map (S (J));
274
275 else
276 -- \ not allowed at the end of the regexp
277
278 Raise_Exception
279 ("Incorrect character '\' in regular expression", J);
280 end if;
281
282 when Open_Paren =>
283 if not Glob then
284 Parenthesis_Level := Parenthesis_Level + 1;
285 Last_Open := J;
286 else
287 Add_In_Map (Open_Paren);
288 end if;
289
290 when Close_Paren =>
291 if not Glob then
292 Parenthesis_Level := Parenthesis_Level - 1;
293
294 if Parenthesis_Level < 0 then
295 Raise_Exception
296 ("')' is not associated with '(' in regular "
297 & "expression", J);
298 end if;
299
300 if J = Last_Open + 1 then
301 Raise_Exception
302 ("Empty parenthesis not allowed in regular "
303 & "expression", J);
304 end if;
305
306 else
307 Add_In_Map (Close_Paren);
308 end if;
309
310 when '.' =>
311 if Glob then
312 Add_In_Map ('.');
313 end if;
314
315 when '{' =>
316 if not Glob then
317 Add_In_Map (S (J));
318 else
319 Curly_Level := Curly_Level + 1;
320 end if;
321
322 when '}' =>
323 if not Glob then
324 Add_In_Map (S (J));
325 else
326 Curly_Level := Curly_Level - 1;
327 end if;
328
329 when '*' | '?' =>
330 if not Glob then
331 if J = S'First then
332 Raise_Exception
333 ("'*', '+', '?' and '|' operators cannot be in "
334 & "first position in regular expression", J);
335 end if;
336 end if;
337
338 when '|' | '+' =>
339 if not Glob then
340 if J = S'First then
341
342 -- These operators must apply to a sub-expression,
343 -- and cannot be found at the beginning of the line
344
345 Raise_Exception
346 ("'*', '+', '?' and '|' operators cannot be in "
347 & "first position in regular expression", J);
348 end if;
349
350 else
351 Add_In_Map (S (J));
352 end if;
353
354 when others =>
355 Add_In_Map (S (J));
356 end case;
357
358 J := J + 1;
359 end loop;
360
361 -- A closing parenthesis must follow an open parenthesis
362
363 if Parenthesis_Level /= 0 then
364 Raise_Exception
365 ("'(' must always be associated with a ')'", J);
366 end if;
367
368 if Curly_Level /= 0 then
369 Raise_Exception
370 ("'{' must always be associated with a '}'", J);
371 end if;
372 end Create_Mapping;
373
374 --------------------------
375 -- Create_Primary_Table --
376 --------------------------
377
378 procedure Create_Primary_Table
379 (Table : out Regexp_Array_Access;
380 Num_States : out State_Index;
381 Start_State : out State_Index;
382 End_State : out State_Index)
383 is
384 Empty_Char : constant Column_Index := Alphabet_Size + 1;
385
386 Current_State : State_Index := 0;
387 -- Index of the last created state
388
389 procedure Add_Empty_Char
390 (State : State_Index;
391 To_State : State_Index);
392 -- Add a empty-character transition from State to To_State
393
394 procedure Create_Repetition
395 (Repetition : Character;
396 Start_Prev : State_Index;
397 End_Prev : State_Index;
398 New_Start : out State_Index;
399 New_End : in out State_Index);
400 -- Create the table in case we have a '*', '+' or '?'.
401 -- Start_Prev .. End_Prev should indicate respectively the start and
402 -- end index of the previous expression, to which '*', '+' or '?' is
403 -- applied.
404
405 procedure Create_Simple
406 (Start_Index : Integer;
407 End_Index : Integer;
408 Start_State : out State_Index;
409 End_State : out State_Index);
410 -- Fill the table for the regexp Simple.
411 -- This is the recursive procedure called to handle () expressions
412 -- If End_State = 0, then the call to Create_Simple creates an
413 -- independent regexp, not a concatenation
414 -- Start_Index .. End_Index is the starting index in the string S.
415 --
416 -- Warning: it may look like we are creating too many empty-string
417 -- transitions, but they are needed to get the correct regexp.
418 -- The table is filled as follow ( s means start-state, e means
419 -- end-state) :
420 --
421 -- regexp state_num | a b * empty_string
422 -- ------- ------------------------------
423 -- a 1 (s) | 2 - - -
424 -- 2 (e) | - - - -
425 --
426 -- ab 1 (s) | 2 - - -
427 -- 2 | - - - 3
428 -- 3 | - 4 - -
429 -- 4 (e) | - - - -
430 --
431 -- a|b 1 | 2 - - -
432 -- 2 | - - - 6
433 -- 3 | - 4 - -
434 -- 4 | - - - 6
435 -- 5 (s) | - - - 1,3
436 -- 6 (e) | - - - -
437 --
438 -- a* 1 | 2 - - -
439 -- 2 | - - - 4
440 -- 3 (s) | - - - 1,4
441 -- 4 (e) | - - - 3
442 --
443 -- (a) 1 (s) | 2 - - -
444 -- 2 (e) | - - - -
445 --
446 -- a+ 1 | 2 - - -
447 -- 2 | - - - 4
448 -- 3 (s) | - - - 1
449 -- 4 (e) | - - - 3
450 --
451 -- a? 1 | 2 - - -
452 -- 2 | - - - 4
453 -- 3 (s) | - - - 1,4
454 -- 4 (e) | - - - -
455 --
456 -- . 1 (s) | 2 2 2 -
457 -- 2 (e) | - - - -
458
459 function Next_Sub_Expression
460 (Start_Index : Integer;
461 End_Index : Integer)
462 return Integer;
463 -- Returns the index of the last character of the next sub-expression
464 -- in Simple. Index cannot be greater than End_Index.
465
466 --------------------
467 -- Add_Empty_Char --
468 --------------------
469
470 procedure Add_Empty_Char
471 (State : State_Index;
472 To_State : State_Index)
473 is
474 J : Column_Index := Empty_Char;
475
476 begin
477 while Get (Table, State, J) /= 0 loop
478 J := J + 1;
479 end loop;
480
481 Set (Table, State, J, To_State);
482 end Add_Empty_Char;
483
484 -----------------------
485 -- Create_Repetition --
486 -----------------------
487
488 procedure Create_Repetition
489 (Repetition : Character;
490 Start_Prev : State_Index;
491 End_Prev : State_Index;
492 New_Start : out State_Index;
493 New_End : in out State_Index)
494 is
495 begin
496 New_Start := Current_State + 1;
497
498 if New_End /= 0 then
499 Add_Empty_Char (New_End, New_Start);
500 end if;
501
502 Current_State := Current_State + 2;
503 New_End := Current_State;
504
505 Add_Empty_Char (End_Prev, New_End);
506 Add_Empty_Char (New_Start, Start_Prev);
507
508 if Repetition /= '+' then
509 Add_Empty_Char (New_Start, New_End);
510 end if;
511
512 if Repetition /= '?' then
513 Add_Empty_Char (New_End, New_Start);
514 end if;
515 end Create_Repetition;
516
517 -------------------
518 -- Create_Simple --
519 -------------------
520
521 procedure Create_Simple
522 (Start_Index : Integer;
523 End_Index : Integer;
524 Start_State : out State_Index;
525 End_State : out State_Index)
526 is
527 J : Integer := Start_Index;
528 Last_Start : State_Index := 0;
529
530 begin
531 Start_State := 0;
532 End_State := 0;
533 while J <= End_Index loop
534 case S (J) is
535 when Open_Paren =>
536 declare
537 J_Start : constant Integer := J + 1;
538 Next_Start : State_Index;
539 Next_End : State_Index;
540
541 begin
542 J := Next_Sub_Expression (J, End_Index);
543 Create_Simple (J_Start, J - 1, Next_Start, Next_End);
544
545 if J < End_Index
546 and then (S (J + 1) = '*' or else
547 S (J + 1) = '+' or else
548 S (J + 1) = '?')
549 then
550 J := J + 1;
551 Create_Repetition
552 (S (J),
553 Next_Start,
554 Next_End,
555 Last_Start,
556 End_State);
557
558 else
559 Last_Start := Next_Start;
560
561 if End_State /= 0 then
562 Add_Empty_Char (End_State, Last_Start);
563 end if;
564
565 End_State := Next_End;
566 end if;
567 end;
568
569 when '|' =>
570 declare
571 Start_Prev : constant State_Index := Start_State;
572 End_Prev : constant State_Index := End_State;
573 Start_J : constant Integer := J + 1;
574 Start_Next : State_Index := 0;
575 End_Next : State_Index := 0;
576
577 begin
578 J := Next_Sub_Expression (J, End_Index);
579
580 -- Create a new state for the start of the alternative
581
582 Current_State := Current_State + 1;
583 Last_Start := Current_State;
584 Start_State := Last_Start;
585
586 -- Create the tree for the second part of alternative
587
588 Create_Simple (Start_J, J, Start_Next, End_Next);
589
590 -- Create the end state
591
592 Add_Empty_Char (Last_Start, Start_Next);
593 Add_Empty_Char (Last_Start, Start_Prev);
594 Current_State := Current_State + 1;
595 End_State := Current_State;
596 Add_Empty_Char (End_Prev, End_State);
597 Add_Empty_Char (End_Next, End_State);
598 end;
599
600 when Open_Bracket =>
601 Current_State := Current_State + 1;
602
603 declare
604 Next_State : State_Index := Current_State + 1;
605
606 begin
607 J := J + 1;
608
609 if S (J) = '^' then
610 J := J + 1;
611
612 Next_State := 0;
613
614 for Column in 0 .. Alphabet_Size loop
615 Set (Table, Current_State, Column,
616 Value => Current_State + 1);
617 end loop;
618 end if;
619
620 -- Automatically add the first character
621
622 if S (J) = '-' or S (J) = ']' then
623 Set (Table, Current_State, Map (S (J)),
624 Value => Next_State);
625 J := J + 1;
626 end if;
627
628 -- Loop till closing bracket found
629
630 loop
631 exit when S (J) = Close_Bracket;
632
633 if S (J) = '-'
634 and then S (J + 1) /= ']'
635 then
636 declare
637 Start : constant Integer := J - 1;
638
639 begin
640 J := J + 1;
641
642 if S (J) = '\' then
643 J := J + 1;
644 end if;
645
646 for Char in S (Start) .. S (J) loop
647 Set (Table, Current_State, Map (Char),
648 Value => Next_State);
649 end loop;
650 end;
651
652 else
653 if S (J) = '\' then
654 J := J + 1;
655 end if;
656
657 Set (Table, Current_State, Map (S (J)),
658 Value => Next_State);
659 end if;
660 J := J + 1;
661 end loop;
662 end;
663
664 Current_State := Current_State + 1;
665
666 -- If the next symbol is a special symbol
667
668 if J < End_Index
669 and then (S (J + 1) = '*' or else
670 S (J + 1) = '+' or else
671 S (J + 1) = '?')
672 then
673 J := J + 1;
674 Create_Repetition
675 (S (J),
676 Current_State - 1,
677 Current_State,
678 Last_Start,
679 End_State);
680
681 else
682 Last_Start := Current_State - 1;
683
684 if End_State /= 0 then
685 Add_Empty_Char (End_State, Last_Start);
686 end if;
687
688 End_State := Current_State;
689 end if;
690
691 when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
692 Raise_Exception
693 ("Incorrect character in regular expression :", J);
694
695 when others =>
696 Current_State := Current_State + 1;
697
698 -- Create the state for the symbol S (J)
699
700 if S (J) = '.' then
701 for K in 0 .. Alphabet_Size loop
702 Set (Table, Current_State, K,
703 Value => Current_State + 1);
704 end loop;
705
706 else
707 if S (J) = '\' then
708 J := J + 1;
709 end if;
710
711 Set (Table, Current_State, Map (S (J)),
712 Value => Current_State + 1);
713 end if;
714
715 Current_State := Current_State + 1;
716
717 -- If the next symbol is a special symbol
718
719 if J < End_Index
720 and then (S (J + 1) = '*' or else
721 S (J + 1) = '+' or else
722 S (J + 1) = '?')
723 then
724 J := J + 1;
725 Create_Repetition
726 (S (J),
727 Current_State - 1,
728 Current_State,
729 Last_Start,
730 End_State);
731
732 else
733 Last_Start := Current_State - 1;
734
735 if End_State /= 0 then
736 Add_Empty_Char (End_State, Last_Start);
737 end if;
738
739 End_State := Current_State;
740 end if;
741
742 end case;
743
744 if Start_State = 0 then
745 Start_State := Last_Start;
746 end if;
747
748 J := J + 1;
749 end loop;
750 end Create_Simple;
751
752 -------------------------
753 -- Next_Sub_Expression --
754 -------------------------
755
756 function Next_Sub_Expression
757 (Start_Index : Integer;
758 End_Index : Integer)
759 return Integer
760 is
761 J : Integer := Start_Index;
762 Start_On_Alter : Boolean := False;
763
764 begin
765 if S (J) = '|' then
766 Start_On_Alter := True;
767 end if;
768
769 loop
770 exit when J = End_Index;
771 J := J + 1;
772
773 case S (J) is
774 when '\' =>
775 J := J + 1;
776
777 when Open_Bracket =>
778 loop
779 J := J + 1;
780 exit when S (J) = Close_Bracket;
781
782 if S (J) = '\' then
783 J := J + 1;
784 end if;
785 end loop;
786
787 when Open_Paren =>
788 J := Next_Sub_Expression (J, End_Index);
789
790 when Close_Paren =>
791 return J;
792
793 when '|' =>
794 if Start_On_Alter then
795 return J - 1;
796 end if;
797
798 when others =>
799 null;
800 end case;
801 end loop;
802
803 return J;
804 end Next_Sub_Expression;
805
806 -- Start of Create_Primary_Table
807
808 begin
809 Table.all := (others => (others => 0));
810 Create_Simple (S'First, S'Last, Start_State, End_State);
811 Num_States := Current_State;
812 end Create_Primary_Table;
813
814 -------------------------------
815 -- Create_Primary_Table_Glob --
816 -------------------------------
817
818 procedure Create_Primary_Table_Glob
819 (Table : out Regexp_Array_Access;
820 Num_States : out State_Index;
821 Start_State : out State_Index;
822 End_State : out State_Index)
823 is
824 Empty_Char : constant Column_Index := Alphabet_Size + 1;
825
826 Current_State : State_Index := 0;
827 -- Index of the last created state
828
829 procedure Add_Empty_Char
830 (State : State_Index;
831 To_State : State_Index);
832 -- Add a empty-character transition from State to To_State
833
834 procedure Create_Simple
835 (Start_Index : Integer;
836 End_Index : Integer;
837 Start_State : out State_Index;
838 End_State : out State_Index);
839 -- Fill the table for the S (Start_Index .. End_Index).
840 -- This is the recursive procedure called to handle () expressions
841
842 --------------------
843 -- Add_Empty_Char --
844 --------------------
845
846 procedure Add_Empty_Char
847 (State : State_Index;
848 To_State : State_Index)
849 is
850 J : Column_Index := Empty_Char;
851
852 begin
853 while Get (Table, State, J) /= 0 loop
854 J := J + 1;
855 end loop;
856
857 Set (Table, State, J,
858 Value => To_State);
859 end Add_Empty_Char;
860
861 -------------------
862 -- Create_Simple --
863 -------------------
864
865 procedure Create_Simple
866 (Start_Index : Integer;
867 End_Index : Integer;
868 Start_State : out State_Index;
869 End_State : out State_Index)
870 is
871 J : Integer := Start_Index;
872 Last_Start : State_Index := 0;
873
874 begin
875 Start_State := 0;
876 End_State := 0;
877
878 while J <= End_Index loop
879 case S (J) is
880
881 when Open_Bracket =>
882 Current_State := Current_State + 1;
883
884 declare
885 Next_State : State_Index := Current_State + 1;
886
887 begin
888 J := J + 1;
889
890 if S (J) = '^' then
891 J := J + 1;
892 Next_State := 0;
893
894 for Column in 0 .. Alphabet_Size loop
895 Set (Table, Current_State, Column,
896 Value => Current_State + 1);
897 end loop;
898 end if;
899
900 -- Automatically add the first character
901
902 if S (J) = '-' or S (J) = ']' then
903 Set (Table, Current_State, Map (S (J)),
904 Value => Current_State);
905 J := J + 1;
906 end if;
907
908 -- Loop till closing bracket found
909
910 loop
911 exit when S (J) = Close_Bracket;
912
913 if S (J) = '-'
914 and then S (J + 1) /= ']'
915 then
916 declare
917 Start : constant Integer := J - 1;
918 begin
919 J := J + 1;
920
921 if S (J) = '\' then
922 J := J + 1;
923 end if;
924
925 for Char in S (Start) .. S (J) loop
926 Set (Table, Current_State, Map (Char),
927 Value => Next_State);
928 end loop;
929 end;
930
931 else
932 if S (J) = '\' then
933 J := J + 1;
934 end if;
935
936 Set (Table, Current_State, Map (S (J)),
937 Value => Next_State);
938 end if;
939 J := J + 1;
940 end loop;
941 end;
942
943 Last_Start := Current_State;
944 Current_State := Current_State + 1;
945
946 if End_State /= 0 then
947 Add_Empty_Char (End_State, Last_Start);
948 end if;
949
950 End_State := Current_State;
951
952 when '{' =>
953 declare
954 End_Sub : Integer;
955 Start_Regexp_Sub : State_Index;
956 End_Regexp_Sub : State_Index;
957 Create_Start : State_Index := 0;
958
959 Create_End : State_Index := 0;
960 -- Initialized to avoid junk warning
961
962 begin
963 while S (J) /= '}' loop
964
965 -- First step : find sub pattern
966
967 End_Sub := J + 1;
968 while S (End_Sub) /= ','
969 and then S (End_Sub) /= '}'
970 loop
971 End_Sub := End_Sub + 1;
972 end loop;
973
974 -- Second step : create a sub pattern
975
976 Create_Simple
977 (J + 1,
978 End_Sub - 1,
979 Start_Regexp_Sub,
980 End_Regexp_Sub);
981
982 J := End_Sub;
983
984 -- Third step : create an alternative
985
986 if Create_Start = 0 then
987 Current_State := Current_State + 1;
988 Create_Start := Current_State;
989 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
990 Current_State := Current_State + 1;
991 Create_End := Current_State;
992 Add_Empty_Char (End_Regexp_Sub, Create_End);
993
994 else
995 Current_State := Current_State + 1;
996 Add_Empty_Char (Current_State, Create_Start);
997 Create_Start := Current_State;
998 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
999 Add_Empty_Char (End_Regexp_Sub, Create_End);
1000 end if;
1001 end loop;
1002
1003 if End_State /= 0 then
1004 Add_Empty_Char (End_State, Create_Start);
1005 end if;
1006
1007 End_State := Create_End;
1008 Last_Start := Create_Start;
1009 end;
1010
1011 when '*' =>
1012 Current_State := Current_State + 1;
1013
1014 if End_State /= 0 then
1015 Add_Empty_Char (End_State, Current_State);
1016 end if;
1017
1018 Add_Empty_Char (Current_State, Current_State + 1);
1019 Add_Empty_Char (Current_State, Current_State + 3);
1020 Last_Start := Current_State;
1021
1022 Current_State := Current_State + 1;
1023
1024 for K in 0 .. Alphabet_Size loop
1025 Set (Table, Current_State, K,
1026 Value => Current_State + 1);
1027 end loop;
1028
1029 Current_State := Current_State + 1;
1030 Add_Empty_Char (Current_State, Current_State + 1);
1031
1032 Current_State := Current_State + 1;
1033 Add_Empty_Char (Current_State, Last_Start);
1034 End_State := Current_State;
1035
1036 when others =>
1037 Current_State := Current_State + 1;
1038
1039 if S (J) = '?' then
1040 for K in 0 .. Alphabet_Size loop
1041 Set (Table, Current_State, K,
1042 Value => Current_State + 1);
1043 end loop;
1044
1045 else
1046 if S (J) = '\' then
1047 J := J + 1;
1048 end if;
1049
1050 -- Create the state for the symbol S (J)
1051
1052 Set (Table, Current_State, Map (S (J)),
1053 Value => Current_State + 1);
1054 end if;
1055
1056 Last_Start := Current_State;
1057 Current_State := Current_State + 1;
1058
1059 if End_State /= 0 then
1060 Add_Empty_Char (End_State, Last_Start);
1061 end if;
1062
1063 End_State := Current_State;
1064
1065 end case;
1066
1067 if Start_State = 0 then
1068 Start_State := Last_Start;
1069 end if;
1070
1071 J := J + 1;
1072 end loop;
1073 end Create_Simple;
1074
1075 -- Start of processing for Create_Primary_Table_Glob
1076
1077 begin
1078 Table.all := (others => (others => 0));
1079 Create_Simple (S'First, S'Last, Start_State, End_State);
1080 Num_States := Current_State;
1081 end Create_Primary_Table_Glob;
1082
1083 ----------------------------
1084 -- Create_Secondary_Table --
1085 ----------------------------
1086
1087 function Create_Secondary_Table
1088 (First_Table : Regexp_Array_Access;
1089 Num_States : State_Index;
1090 Start_State : State_Index;
1091 End_State : State_Index) return Regexp
1092 is
1093 pragma Warnings (Off, Num_States);
1094
1095 Last_Index : constant State_Index := First_Table'Last (1);
1096 type Meta_State is array (1 .. Last_Index) of Boolean;
1097
1098 Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1099 (others => (others => 0));
1100
1101 Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1102 (others => (others => False));
1103
1104 Temp_State_Not_Null : Boolean;
1105
1106 Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1107
1108 Current_State : State_Index := 1;
1109 Nb_State : State_Index := 1;
1110
1111 procedure Closure
1112 (State : in out Meta_State;
1113 Item : State_Index);
1114 -- Compute the closure of the state (that is every other state which
1115 -- has a empty-character transition) and add it to the state
1116
1117 -------------
1118 -- Closure --
1119 -------------
1120
1121 procedure Closure
1122 (State : in out Meta_State;
1123 Item : State_Index)
1124 is
1125 begin
1126 if State (Item) then
1127 return;
1128 end if;
1129
1130 State (Item) := True;
1131
1132 for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1133 if First_Table (Item, Column) = 0 then
1134 return;
1135 end if;
1136
1137 Closure (State, First_Table (Item, Column));
1138 end loop;
1139 end Closure;
1140
1141 -- Start of processing for Create_Secondary_Table
1142
1143 begin
1144 -- Create a new state
1145
1146 Closure (Meta_States (Current_State), Start_State);
1147
1148 while Current_State <= Nb_State loop
1149
1150 -- If this new meta-state includes the primary table end state,
1151 -- then this meta-state will be a final state in the regexp
1152
1153 if Meta_States (Current_State)(End_State) then
1154 Is_Final (Current_State) := True;
1155 end if;
1156
1157 -- For every character in the regexp, calculate the possible
1158 -- transitions from Current_State
1159
1160 for Column in 0 .. Alphabet_Size loop
1161 Meta_States (Nb_State + 1) := (others => False);
1162 Temp_State_Not_Null := False;
1163
1164 for K in Meta_States (Current_State)'Range loop
1165 if Meta_States (Current_State)(K)
1166 and then First_Table (K, Column) /= 0
1167 then
1168 Closure
1169 (Meta_States (Nb_State + 1), First_Table (K, Column));
1170 Temp_State_Not_Null := True;
1171 end if;
1172 end loop;
1173
1174 -- If at least one transition existed
1175
1176 if Temp_State_Not_Null then
1177
1178 -- Check if this new state corresponds to an old one
1179
1180 for K in 1 .. Nb_State loop
1181 if Meta_States (K) = Meta_States (Nb_State + 1) then
1182 Table (Current_State, Column) := K;
1183 exit;
1184 end if;
1185 end loop;
1186
1187 -- If not, create a new state
1188
1189 if Table (Current_State, Column) = 0 then
1190 Nb_State := Nb_State + 1;
1191 Table (Current_State, Column) := Nb_State;
1192 end if;
1193 end if;
1194 end loop;
1195
1196 Current_State := Current_State + 1;
1197 end loop;
1198
1199 -- Returns the regexp
1200
1201 declare
1202 R : Regexp_Access;
1203
1204 begin
1205 R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1206 Num_States => Nb_State);
1207 R.Map := Map;
1208 R.Is_Final := Is_Final (1 .. Nb_State);
1209 R.Case_Sensitive := Case_Sensitive;
1210
1211 for State in 1 .. Nb_State loop
1212 for K in 0 .. Alphabet_Size loop
1213 R.States (State, K) := Table (State, K);
1214 end loop;
1215 end loop;
1216
1217 return (Ada.Finalization.Controlled with R => R);
1218 end;
1219 end Create_Secondary_Table;
1220
1221 ---------------------
1222 -- Raise_Exception --
1223 ---------------------
1224
1225 procedure Raise_Exception (M : String; Index : Integer) is
1226 begin
1227 raise Error_In_Regexp with M & " at offset " & Index'Img;
1228 end Raise_Exception;
1229
1230 -- Start of processing for Compile
1231
1232 begin
1233 -- Special case for the empty string: it always matches, and the
1234 -- following processing would fail on it.
1235 if S = "" then
1236 return (Ada.Finalization.Controlled with
1237 R => new Regexp_Value'
1238 (Alphabet_Size => 0,
1239 Num_States => 1,
1240 Map => (others => 0),
1241 States => (others => (others => 1)),
1242 Is_Final => (others => True),
1243 Case_Sensitive => True));
1244 end if;
1245
1246 if not Case_Sensitive then
1247 System.Case_Util.To_Lower (S);
1248 end if;
1249
1250 Create_Mapping;
1251
1252 -- Creates the primary table
1253
1254 declare
1255 Table : Regexp_Array_Access;
1256 Num_States : State_Index;
1257 Start_State : State_Index;
1258 End_State : State_Index;
1259 R : Regexp;
1260
1261 begin
1262 Table := new Regexp_Array (1 .. 100,
1263 0 .. Alphabet_Size + 10);
1264 if not Glob then
1265 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1266 else
1267 Create_Primary_Table_Glob
1268 (Table, Num_States, Start_State, End_State);
1269 end if;
1270
1271 -- Creates the secondary table
1272
1273 R := Create_Secondary_Table
1274 (Table, Num_States, Start_State, End_State);
1275 Free (Table);
1276 return R;
1277 end;
1278 end Compile;
1279
1280 --------------
1281 -- Finalize --
1282 --------------
1283
1284 procedure Finalize (R : in out Regexp) is
1285 procedure Free is new
1286 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1287
1288 begin
1289 Free (R.R);
1290 end Finalize;
1291
1292 ---------
1293 -- Get --
1294 ---------
1295
1296 function Get
1297 (Table : Regexp_Array_Access;
1298 State : State_Index;
1299 Column : Column_Index) return State_Index
1300 is
1301 begin
1302 if State <= Table'Last (1)
1303 and then Column <= Table'Last (2)
1304 then
1305 return Table (State, Column);
1306 else
1307 return 0;
1308 end if;
1309 end Get;
1310
1311 -----------
1312 -- Match --
1313 -----------
1314
1315 function Match (S : String; R : Regexp) return Boolean is
1316 Current_State : State_Index := 1;
1317
1318 begin
1319 if R.R = null then
1320 raise Constraint_Error;
1321 end if;
1322
1323 for Char in S'Range loop
1324
1325 if R.R.Case_Sensitive then
1326 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1327 else
1328 Current_State :=
1329 R.R.States (Current_State,
1330 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1331 end if;
1332
1333 if Current_State = 0 then
1334 return False;
1335 end if;
1336
1337 end loop;
1338
1339 return R.R.Is_Final (Current_State);
1340 end Match;
1341
1342 ---------
1343 -- Set --
1344 ---------
1345
1346 procedure Set
1347 (Table : in out Regexp_Array_Access;
1348 State : State_Index;
1349 Column : Column_Index;
1350 Value : State_Index)
1351 is
1352 New_Lines : State_Index;
1353 New_Columns : Column_Index;
1354 New_Table : Regexp_Array_Access;
1355
1356 begin
1357 if State <= Table'Last (1)
1358 and then Column <= Table'Last (2)
1359 then
1360 Table (State, Column) := Value;
1361 else
1362 -- Doubles the size of the table until it is big enough that
1363 -- (State, Column) is a valid index
1364
1365 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1366 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1367 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1368 Table'First (2) .. New_Columns);
1369 New_Table.all := (others => (others => 0));
1370
1371 for J in Table'Range (1) loop
1372 for K in Table'Range (2) loop
1373 New_Table (J, K) := Table (J, K);
1374 end loop;
1375 end loop;
1376
1377 Free (Table);
1378 Table := New_Table;
1379 Table (State, Column) := Value;
1380 end if;
1381 end Set;
1382
1383 end System.Regexp;