g-string.adb, [...]: Replace GNAT.xxx by System.xxx when appropriate.
[gcc.git] / gcc / ada / s-regexp.adb
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- G N A T . R E G E X P --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1999-2007, AdaCore --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
21 -- --
22 -- As a special exception, if other files instantiate generics from this --
23 -- unit, or you link this unit with other files to produce an executable, --
24 -- this unit does not by itself cause the resulting executable to be --
25 -- covered by the GNU General Public License. This exception does not --
26 -- however invalidate any other reasons why the executable file might be --
27 -- covered by the GNU Public License. --
28 -- --
29 -- GNAT was originally developed by the GNAT team at New York University. --
30 -- Extensive contributions were provided by Ada Core Technologies Inc. --
31 -- --
32 ------------------------------------------------------------------------------
33
34 with Ada.Unchecked_Deallocation;
35 with Ada.Exceptions;
36
37 with System.Case_Util;
38
39 package body System.Regexp is
40
41 Open_Paren : constant Character := '(';
42 Close_Paren : constant Character := ')';
43 Open_Bracket : constant Character := '[';
44 Close_Bracket : constant Character := ']';
45
46 type State_Index is new Natural;
47 type Column_Index is new Natural;
48
49 type Regexp_Array is array
50 (State_Index range <>, Column_Index range <>) of State_Index;
51 -- First index is for the state number
52 -- Second index is for the character type
53 -- Contents is the new State
54
55 type Regexp_Array_Access is access Regexp_Array;
56 -- Use this type through the functions Set below, so that it
57 -- can grow dynamically depending on the needs.
58
59 type Mapping is array (Character'Range) of Column_Index;
60 -- Mapping between characters and column in the Regexp_Array
61
62 type Boolean_Array is array (State_Index range <>) of Boolean;
63
64 type Regexp_Value
65 (Alphabet_Size : Column_Index;
66 Num_States : State_Index) is
67 record
68 Map : Mapping;
69 States : Regexp_Array (1 .. Num_States, 0 .. Alphabet_Size);
70 Is_Final : Boolean_Array (1 .. Num_States);
71 Case_Sensitive : Boolean;
72 end record;
73 -- Deterministic finite-state machine
74
75 -----------------------
76 -- Local Subprograms --
77 -----------------------
78
79 procedure Set
80 (Table : in out Regexp_Array_Access;
81 State : State_Index;
82 Column : Column_Index;
83 Value : State_Index);
84 -- Sets a value in the table. If the table is too small, reallocate it
85 -- dynamically so that (State, Column) is a valid index in it.
86
87 function Get
88 (Table : Regexp_Array_Access;
89 State : State_Index;
90 Column : Column_Index)
91 return State_Index;
92 -- Returns the value in the table at (State, Column).
93 -- If this index does not exist in the table, returns 0
94
95 procedure Free is new Ada.Unchecked_Deallocation
96 (Regexp_Array, Regexp_Array_Access);
97
98 ------------
99 -- Adjust --
100 ------------
101
102 procedure Adjust (R : in out Regexp) is
103 Tmp : Regexp_Access;
104
105 begin
106 Tmp := new Regexp_Value (Alphabet_Size => R.R.Alphabet_Size,
107 Num_States => R.R.Num_States);
108 Tmp.all := R.R.all;
109 R.R := Tmp;
110 end Adjust;
111
112 -------------
113 -- Compile --
114 -------------
115
116 function Compile
117 (Pattern : String;
118 Glob : Boolean := False;
119 Case_Sensitive : Boolean := True)
120 return Regexp
121 is
122 S : String := Pattern;
123 -- The pattern which is really compiled (when the pattern is case
124 -- insensitive, we convert this string to lower-cases
125
126 Map : Mapping := (others => 0);
127 -- Mapping between characters and columns in the tables
128
129 Alphabet_Size : Column_Index := 0;
130 -- Number of significant characters in the regular expression.
131 -- This total does not include special operators, such as *, (, ...
132
133 procedure Create_Mapping;
134 -- Creates a mapping between characters in the regexp and columns
135 -- in the tables representing the regexp. Test that the regexp is
136 -- well-formed Modifies Alphabet_Size and Map
137
138 procedure Create_Primary_Table
139 (Table : out Regexp_Array_Access;
140 Num_States : out State_Index;
141 Start_State : out State_Index;
142 End_State : out State_Index);
143 -- Creates the first version of the regexp (this is a non determinist
144 -- finite state machine, which is unadapted for a fast pattern
145 -- matching algorithm). We use a recursive algorithm to process the
146 -- parenthesis sub-expressions.
147 --
148 -- Table : at the end of the procedure : Column 0 is for any character
149 -- ('.') and the last columns are for no character (closure)
150 -- Num_States is set to the number of states in the table
151 -- Start_State is the number of the starting state in the regexp
152 -- End_State is the number of the final state when the regexp matches
153
154 procedure Create_Primary_Table_Glob
155 (Table : out Regexp_Array_Access;
156 Num_States : out State_Index;
157 Start_State : out State_Index;
158 End_State : out State_Index);
159 -- Same function as above, but it deals with the second possible
160 -- grammar for 'globbing pattern', which is a kind of subset of the
161 -- whole regular expression grammar.
162
163 function Create_Secondary_Table
164 (First_Table : Regexp_Array_Access;
165 Num_States : State_Index;
166 Start_State : State_Index;
167 End_State : State_Index)
168 return Regexp;
169 -- Creates the definitive table representing the regular expression
170 -- This is actually a transformation of the primary table First_Table,
171 -- where every state is grouped with the states in its 'no-character'
172 -- columns. The transitions between the new states are then recalculated
173 -- and if necessary some new states are created.
174 --
175 -- Note that the resulting finite-state machine is not optimized in
176 -- terms of the number of states : it would be more time-consuming to
177 -- add a third pass to reduce the number of states in the machine, with
178 -- no speed improvement...
179
180 procedure Raise_Exception
181 (M : String;
182 Index : Integer);
183 pragma No_Return (Raise_Exception);
184 -- Raise an exception, indicating an error at character Index in S
185
186 --------------------
187 -- Create_Mapping --
188 --------------------
189
190 procedure Create_Mapping is
191
192 procedure Add_In_Map (C : Character);
193 -- Add a character in the mapping, if it is not already defined
194
195 ----------------
196 -- Add_In_Map --
197 ----------------
198
199 procedure Add_In_Map (C : Character) is
200 begin
201 if Map (C) = 0 then
202 Alphabet_Size := Alphabet_Size + 1;
203 Map (C) := Alphabet_Size;
204 end if;
205 end Add_In_Map;
206
207 J : Integer := S'First;
208 Parenthesis_Level : Integer := 0;
209 Curly_Level : Integer := 0;
210
211 -- Start of processing for Create_Mapping
212
213 begin
214 while J <= S'Last loop
215 case S (J) is
216 when Open_Bracket =>
217 J := J + 1;
218
219 if S (J) = '^' then
220 J := J + 1;
221 end if;
222
223 if S (J) = ']' or S (J) = '-' then
224 J := J + 1;
225 end if;
226
227 -- The first character never has a special meaning
228
229 loop
230 if J > S'Last then
231 Raise_Exception
232 ("Ran out of characters while parsing ", J);
233 end if;
234
235 exit when S (J) = Close_Bracket;
236
237 if S (J) = '-'
238 and then S (J + 1) /= Close_Bracket
239 then
240 declare
241 Start : constant Integer := J - 1;
242
243 begin
244 J := J + 1;
245
246 if S (J) = '\' then
247 J := J + 1;
248 end if;
249
250 for Char in S (Start) .. S (J) loop
251 Add_In_Map (Char);
252 end loop;
253 end;
254 else
255 if S (J) = '\' then
256 J := J + 1;
257 end if;
258
259 Add_In_Map (S (J));
260 end if;
261
262 J := J + 1;
263 end loop;
264
265 -- A close bracket must follow a open_bracket,
266 -- and cannot be found alone on the line
267
268 when Close_Bracket =>
269 Raise_Exception
270 ("Incorrect character ']' in regular expression", J);
271
272 when '\' =>
273 if J < S'Last then
274 J := J + 1;
275 Add_In_Map (S (J));
276
277 else
278 -- \ not allowed at the end of the regexp
279
280 Raise_Exception
281 ("Incorrect character '\' in regular expression", J);
282 end if;
283
284 when Open_Paren =>
285 if not Glob then
286 Parenthesis_Level := Parenthesis_Level + 1;
287 else
288 Add_In_Map (Open_Paren);
289 end if;
290
291 when Close_Paren =>
292 if not Glob then
293 Parenthesis_Level := Parenthesis_Level - 1;
294
295 if Parenthesis_Level < 0 then
296 Raise_Exception
297 ("')' is not associated with '(' in regular "
298 & "expression", J);
299 end if;
300
301 if S (J - 1) = Open_Paren then
302 Raise_Exception
303 ("Empty parenthesis not allowed in regular "
304 & "expression", J);
305 end if;
306
307 else
308 Add_In_Map (Close_Paren);
309 end if;
310
311 when '.' =>
312 if Glob then
313 Add_In_Map ('.');
314 end if;
315
316 when '{' =>
317 if not Glob then
318 Add_In_Map (S (J));
319 else
320 Curly_Level := Curly_Level + 1;
321 end if;
322
323 when '}' =>
324 if not Glob then
325 Add_In_Map (S (J));
326 else
327 Curly_Level := Curly_Level - 1;
328 end if;
329
330 when '*' | '?' =>
331 if not Glob then
332 if J = S'First then
333 Raise_Exception
334 ("'*', '+', '?' and '|' operators cannot be in "
335 & "first position in regular expression", J);
336 end if;
337 end if;
338
339 when '|' | '+' =>
340 if not Glob then
341 if J = S'First then
342
343 -- These operators must apply to a sub-expression,
344 -- and cannot be found at the beginning of the line
345
346 Raise_Exception
347 ("'*', '+', '?' and '|' operators cannot be in "
348 & "first position in regular expression", J);
349 end if;
350
351 else
352 Add_In_Map (S (J));
353 end if;
354
355 when others =>
356 Add_In_Map (S (J));
357 end case;
358
359 J := J + 1;
360 end loop;
361
362 -- A closing parenthesis must follow an open parenthesis
363
364 if Parenthesis_Level /= 0 then
365 Raise_Exception
366 ("'(' must always be associated with a ')'", J);
367 end if;
368
369 if Curly_Level /= 0 then
370 Raise_Exception
371 ("'{' must always be associated with a '}'", J);
372 end if;
373 end Create_Mapping;
374
375 --------------------------
376 -- Create_Primary_Table --
377 --------------------------
378
379 procedure Create_Primary_Table
380 (Table : out Regexp_Array_Access;
381 Num_States : out State_Index;
382 Start_State : out State_Index;
383 End_State : out State_Index)
384 is
385 Empty_Char : constant Column_Index := Alphabet_Size + 1;
386
387 Current_State : State_Index := 0;
388 -- Index of the last created state
389
390 procedure Add_Empty_Char
391 (State : State_Index;
392 To_State : State_Index);
393 -- Add a empty-character transition from State to To_State
394
395 procedure Create_Repetition
396 (Repetition : Character;
397 Start_Prev : State_Index;
398 End_Prev : State_Index;
399 New_Start : out State_Index;
400 New_End : in out State_Index);
401 -- Create the table in case we have a '*', '+' or '?'.
402 -- Start_Prev .. End_Prev should indicate respectively the start and
403 -- end index of the previous expression, to which '*', '+' or '?' is
404 -- applied.
405
406 procedure Create_Simple
407 (Start_Index : Integer;
408 End_Index : Integer;
409 Start_State : out State_Index;
410 End_State : out State_Index);
411 -- Fill the table for the regexp Simple.
412 -- This is the recursive procedure called to handle () expressions
413 -- If End_State = 0, then the call to Create_Simple creates an
414 -- independent regexp, not a concatenation
415 -- Start_Index .. End_Index is the starting index in the string S.
416 --
417 -- Warning: it may look like we are creating too many empty-string
418 -- transitions, but they are needed to get the correct regexp.
419 -- The table is filled as follow ( s means start-state, e means
420 -- end-state) :
421 --
422 -- regexp state_num | a b * empty_string
423 -- ------- ------------------------------
424 -- a 1 (s) | 2 - - -
425 -- 2 (e) | - - - -
426 --
427 -- ab 1 (s) | 2 - - -
428 -- 2 | - - - 3
429 -- 3 | - 4 - -
430 -- 4 (e) | - - - -
431 --
432 -- a|b 1 | 2 - - -
433 -- 2 | - - - 6
434 -- 3 | - 4 - -
435 -- 4 | - - - 6
436 -- 5 (s) | - - - 1,3
437 -- 6 (e) | - - - -
438 --
439 -- a* 1 | 2 - - -
440 -- 2 | - - - 4
441 -- 3 (s) | - - - 1,4
442 -- 4 (e) | - - - 3
443 --
444 -- (a) 1 (s) | 2 - - -
445 -- 2 (e) | - - - -
446 --
447 -- a+ 1 | 2 - - -
448 -- 2 | - - - 4
449 -- 3 (s) | - - - 1
450 -- 4 (e) | - - - 3
451 --
452 -- a? 1 | 2 - - -
453 -- 2 | - - - 4
454 -- 3 (s) | - - - 1,4
455 -- 4 (e) | - - - -
456 --
457 -- . 1 (s) | 2 2 2 -
458 -- 2 (e) | - - - -
459
460 function Next_Sub_Expression
461 (Start_Index : Integer;
462 End_Index : Integer)
463 return Integer;
464 -- Returns the index of the last character of the next sub-expression
465 -- in Simple. Index cannot be greater than End_Index.
466
467 --------------------
468 -- Add_Empty_Char --
469 --------------------
470
471 procedure Add_Empty_Char
472 (State : State_Index;
473 To_State : State_Index)
474 is
475 J : Column_Index := Empty_Char;
476
477 begin
478 while Get (Table, State, J) /= 0 loop
479 J := J + 1;
480 end loop;
481
482 Set (Table, State, J, To_State);
483 end Add_Empty_Char;
484
485 -----------------------
486 -- Create_Repetition --
487 -----------------------
488
489 procedure Create_Repetition
490 (Repetition : Character;
491 Start_Prev : State_Index;
492 End_Prev : State_Index;
493 New_Start : out State_Index;
494 New_End : in out State_Index)
495 is
496 begin
497 New_Start := Current_State + 1;
498
499 if New_End /= 0 then
500 Add_Empty_Char (New_End, New_Start);
501 end if;
502
503 Current_State := Current_State + 2;
504 New_End := Current_State;
505
506 Add_Empty_Char (End_Prev, New_End);
507 Add_Empty_Char (New_Start, Start_Prev);
508
509 if Repetition /= '+' then
510 Add_Empty_Char (New_Start, New_End);
511 end if;
512
513 if Repetition /= '?' then
514 Add_Empty_Char (New_End, New_Start);
515 end if;
516 end Create_Repetition;
517
518 -------------------
519 -- Create_Simple --
520 -------------------
521
522 procedure Create_Simple
523 (Start_Index : Integer;
524 End_Index : Integer;
525 Start_State : out State_Index;
526 End_State : out State_Index)
527 is
528 J : Integer := Start_Index;
529 Last_Start : State_Index := 0;
530
531 begin
532 Start_State := 0;
533 End_State := 0;
534 while J <= End_Index loop
535 case S (J) is
536 when Open_Paren =>
537 declare
538 J_Start : constant Integer := J + 1;
539 Next_Start : State_Index;
540 Next_End : State_Index;
541
542 begin
543 J := Next_Sub_Expression (J, End_Index);
544 Create_Simple (J_Start, J - 1, Next_Start, Next_End);
545
546 if J < End_Index
547 and then (S (J + 1) = '*' or else
548 S (J + 1) = '+' or else
549 S (J + 1) = '?')
550 then
551 J := J + 1;
552 Create_Repetition
553 (S (J),
554 Next_Start,
555 Next_End,
556 Last_Start,
557 End_State);
558
559 else
560 Last_Start := Next_Start;
561
562 if End_State /= 0 then
563 Add_Empty_Char (End_State, Last_Start);
564 end if;
565
566 End_State := Next_End;
567 end if;
568 end;
569
570 when '|' =>
571 declare
572 Start_Prev : constant State_Index := Start_State;
573 End_Prev : constant State_Index := End_State;
574 Start_J : constant Integer := J + 1;
575 Start_Next : State_Index := 0;
576 End_Next : State_Index := 0;
577
578 begin
579 J := Next_Sub_Expression (J, End_Index);
580
581 -- Create a new state for the start of the alternative
582
583 Current_State := Current_State + 1;
584 Last_Start := Current_State;
585 Start_State := Last_Start;
586
587 -- Create the tree for the second part of alternative
588
589 Create_Simple (Start_J, J, Start_Next, End_Next);
590
591 -- Create the end state
592
593 Add_Empty_Char (Last_Start, Start_Next);
594 Add_Empty_Char (Last_Start, Start_Prev);
595 Current_State := Current_State + 1;
596 End_State := Current_State;
597 Add_Empty_Char (End_Prev, End_State);
598 Add_Empty_Char (End_Next, End_State);
599 end;
600
601 when Open_Bracket =>
602 Current_State := Current_State + 1;
603
604 declare
605 Next_State : State_Index := Current_State + 1;
606
607 begin
608 J := J + 1;
609
610 if S (J) = '^' then
611 J := J + 1;
612
613 Next_State := 0;
614
615 for Column in 0 .. Alphabet_Size loop
616 Set (Table, Current_State, Column,
617 Value => Current_State + 1);
618 end loop;
619 end if;
620
621 -- Automatically add the first character
622
623 if S (J) = '-' or S (J) = ']' then
624 Set (Table, Current_State, Map (S (J)),
625 Value => Next_State);
626 J := J + 1;
627 end if;
628
629 -- Loop till closing bracket found
630
631 loop
632 exit when S (J) = Close_Bracket;
633
634 if S (J) = '-'
635 and then S (J + 1) /= ']'
636 then
637 declare
638 Start : constant Integer := J - 1;
639
640 begin
641 J := J + 1;
642
643 if S (J) = '\' then
644 J := J + 1;
645 end if;
646
647 for Char in S (Start) .. S (J) loop
648 Set (Table, Current_State, Map (Char),
649 Value => Next_State);
650 end loop;
651 end;
652
653 else
654 if S (J) = '\' then
655 J := J + 1;
656 end if;
657
658 Set (Table, Current_State, Map (S (J)),
659 Value => Next_State);
660 end if;
661 J := J + 1;
662 end loop;
663 end;
664
665 Current_State := Current_State + 1;
666
667 -- If the next symbol is a special symbol
668
669 if J < End_Index
670 and then (S (J + 1) = '*' or else
671 S (J + 1) = '+' or else
672 S (J + 1) = '?')
673 then
674 J := J + 1;
675 Create_Repetition
676 (S (J),
677 Current_State - 1,
678 Current_State,
679 Last_Start,
680 End_State);
681
682 else
683 Last_Start := Current_State - 1;
684
685 if End_State /= 0 then
686 Add_Empty_Char (End_State, Last_Start);
687 end if;
688
689 End_State := Current_State;
690 end if;
691
692 when '*' | '+' | '?' | Close_Paren | Close_Bracket =>
693 Raise_Exception
694 ("Incorrect character in regular expression :", J);
695
696 when others =>
697 Current_State := Current_State + 1;
698
699 -- Create the state for the symbol S (J)
700
701 if S (J) = '.' then
702 for K in 0 .. Alphabet_Size loop
703 Set (Table, Current_State, K,
704 Value => Current_State + 1);
705 end loop;
706
707 else
708 if S (J) = '\' then
709 J := J + 1;
710 end if;
711
712 Set (Table, Current_State, Map (S (J)),
713 Value => Current_State + 1);
714 end if;
715
716 Current_State := Current_State + 1;
717
718 -- If the next symbol is a special symbol
719
720 if J < End_Index
721 and then (S (J + 1) = '*' or else
722 S (J + 1) = '+' or else
723 S (J + 1) = '?')
724 then
725 J := J + 1;
726 Create_Repetition
727 (S (J),
728 Current_State - 1,
729 Current_State,
730 Last_Start,
731 End_State);
732
733 else
734 Last_Start := Current_State - 1;
735
736 if End_State /= 0 then
737 Add_Empty_Char (End_State, Last_Start);
738 end if;
739
740 End_State := Current_State;
741 end if;
742
743 end case;
744
745 if Start_State = 0 then
746 Start_State := Last_Start;
747 end if;
748
749 J := J + 1;
750 end loop;
751 end Create_Simple;
752
753 -------------------------
754 -- Next_Sub_Expression --
755 -------------------------
756
757 function Next_Sub_Expression
758 (Start_Index : Integer;
759 End_Index : Integer)
760 return Integer
761 is
762 J : Integer := Start_Index;
763 Start_On_Alter : Boolean := False;
764
765 begin
766 if S (J) = '|' then
767 Start_On_Alter := True;
768 end if;
769
770 loop
771 exit when J = End_Index;
772 J := J + 1;
773
774 case S (J) is
775 when '\' =>
776 J := J + 1;
777
778 when Open_Bracket =>
779 loop
780 J := J + 1;
781 exit when S (J) = Close_Bracket;
782
783 if S (J) = '\' then
784 J := J + 1;
785 end if;
786 end loop;
787
788 when Open_Paren =>
789 J := Next_Sub_Expression (J, End_Index);
790
791 when Close_Paren =>
792 return J;
793
794 when '|' =>
795 if Start_On_Alter then
796 return J - 1;
797 end if;
798
799 when others =>
800 null;
801 end case;
802 end loop;
803
804 return J;
805 end Next_Sub_Expression;
806
807 -- Start of Create_Primary_Table
808
809 begin
810 Table.all := (others => (others => 0));
811 Create_Simple (S'First, S'Last, Start_State, End_State);
812 Num_States := Current_State;
813 end Create_Primary_Table;
814
815 -------------------------------
816 -- Create_Primary_Table_Glob --
817 -------------------------------
818
819 procedure Create_Primary_Table_Glob
820 (Table : out Regexp_Array_Access;
821 Num_States : out State_Index;
822 Start_State : out State_Index;
823 End_State : out State_Index)
824 is
825 Empty_Char : constant Column_Index := Alphabet_Size + 1;
826
827 Current_State : State_Index := 0;
828 -- Index of the last created state
829
830 procedure Add_Empty_Char
831 (State : State_Index;
832 To_State : State_Index);
833 -- Add a empty-character transition from State to To_State
834
835 procedure Create_Simple
836 (Start_Index : Integer;
837 End_Index : Integer;
838 Start_State : out State_Index;
839 End_State : out State_Index);
840 -- Fill the table for the S (Start_Index .. End_Index).
841 -- This is the recursive procedure called to handle () expressions
842
843 --------------------
844 -- Add_Empty_Char --
845 --------------------
846
847 procedure Add_Empty_Char
848 (State : State_Index;
849 To_State : State_Index)
850 is
851 J : Column_Index := Empty_Char;
852
853 begin
854 while Get (Table, State, J) /= 0 loop
855 J := J + 1;
856 end loop;
857
858 Set (Table, State, J,
859 Value => To_State);
860 end Add_Empty_Char;
861
862 -------------------
863 -- Create_Simple --
864 -------------------
865
866 procedure Create_Simple
867 (Start_Index : Integer;
868 End_Index : Integer;
869 Start_State : out State_Index;
870 End_State : out State_Index)
871 is
872 J : Integer := Start_Index;
873 Last_Start : State_Index := 0;
874
875 begin
876 Start_State := 0;
877 End_State := 0;
878
879 while J <= End_Index loop
880 case S (J) is
881
882 when Open_Bracket =>
883 Current_State := Current_State + 1;
884
885 declare
886 Next_State : State_Index := Current_State + 1;
887
888 begin
889 J := J + 1;
890
891 if S (J) = '^' then
892 J := J + 1;
893 Next_State := 0;
894
895 for Column in 0 .. Alphabet_Size loop
896 Set (Table, Current_State, Column,
897 Value => Current_State + 1);
898 end loop;
899 end if;
900
901 -- Automatically add the first character
902
903 if S (J) = '-' or S (J) = ']' then
904 Set (Table, Current_State, Map (S (J)),
905 Value => Current_State);
906 J := J + 1;
907 end if;
908
909 -- Loop till closing bracket found
910
911 loop
912 exit when S (J) = Close_Bracket;
913
914 if S (J) = '-'
915 and then S (J + 1) /= ']'
916 then
917 declare
918 Start : constant Integer := J - 1;
919 begin
920 J := J + 1;
921
922 if S (J) = '\' then
923 J := J + 1;
924 end if;
925
926 for Char in S (Start) .. S (J) loop
927 Set (Table, Current_State, Map (Char),
928 Value => Next_State);
929 end loop;
930 end;
931
932 else
933 if S (J) = '\' then
934 J := J + 1;
935 end if;
936
937 Set (Table, Current_State, Map (S (J)),
938 Value => Next_State);
939 end if;
940 J := J + 1;
941 end loop;
942 end;
943
944 Last_Start := Current_State;
945 Current_State := Current_State + 1;
946
947 if End_State /= 0 then
948 Add_Empty_Char (End_State, Last_Start);
949 end if;
950
951 End_State := Current_State;
952
953 when '{' =>
954 declare
955 End_Sub : Integer;
956 Start_Regexp_Sub : State_Index;
957 End_Regexp_Sub : State_Index;
958 Create_Start : State_Index := 0;
959
960 Create_End : State_Index := 0;
961 -- Initialized to avoid junk warning
962
963 begin
964 while S (J) /= '}' loop
965
966 -- First step : find sub pattern
967
968 End_Sub := J + 1;
969 while S (End_Sub) /= ','
970 and then S (End_Sub) /= '}'
971 loop
972 End_Sub := End_Sub + 1;
973 end loop;
974
975 -- Second step : create a sub pattern
976
977 Create_Simple
978 (J + 1,
979 End_Sub - 1,
980 Start_Regexp_Sub,
981 End_Regexp_Sub);
982
983 J := End_Sub;
984
985 -- Third step : create an alternative
986
987 if Create_Start = 0 then
988 Current_State := Current_State + 1;
989 Create_Start := Current_State;
990 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
991 Current_State := Current_State + 1;
992 Create_End := Current_State;
993 Add_Empty_Char (End_Regexp_Sub, Create_End);
994
995 else
996 Current_State := Current_State + 1;
997 Add_Empty_Char (Current_State, Create_Start);
998 Create_Start := Current_State;
999 Add_Empty_Char (Create_Start, Start_Regexp_Sub);
1000 Add_Empty_Char (End_Regexp_Sub, Create_End);
1001 end if;
1002 end loop;
1003
1004 if End_State /= 0 then
1005 Add_Empty_Char (End_State, Create_Start);
1006 end if;
1007
1008 End_State := Create_End;
1009 Last_Start := Create_Start;
1010 end;
1011
1012 when '*' =>
1013 Current_State := Current_State + 1;
1014
1015 if End_State /= 0 then
1016 Add_Empty_Char (End_State, Current_State);
1017 end if;
1018
1019 Add_Empty_Char (Current_State, Current_State + 1);
1020 Add_Empty_Char (Current_State, Current_State + 3);
1021 Last_Start := Current_State;
1022
1023 Current_State := Current_State + 1;
1024
1025 for K in 0 .. Alphabet_Size loop
1026 Set (Table, Current_State, K,
1027 Value => Current_State + 1);
1028 end loop;
1029
1030 Current_State := Current_State + 1;
1031 Add_Empty_Char (Current_State, Current_State + 1);
1032
1033 Current_State := Current_State + 1;
1034 Add_Empty_Char (Current_State, Last_Start);
1035 End_State := Current_State;
1036
1037 when others =>
1038 Current_State := Current_State + 1;
1039
1040 if S (J) = '?' then
1041 for K in 0 .. Alphabet_Size loop
1042 Set (Table, Current_State, K,
1043 Value => Current_State + 1);
1044 end loop;
1045
1046 else
1047 if S (J) = '\' then
1048 J := J + 1;
1049 end if;
1050
1051 -- Create the state for the symbol S (J)
1052
1053 Set (Table, Current_State, Map (S (J)),
1054 Value => Current_State + 1);
1055 end if;
1056
1057 Last_Start := Current_State;
1058 Current_State := Current_State + 1;
1059
1060 if End_State /= 0 then
1061 Add_Empty_Char (End_State, Last_Start);
1062 end if;
1063
1064 End_State := Current_State;
1065
1066 end case;
1067
1068 if Start_State = 0 then
1069 Start_State := Last_Start;
1070 end if;
1071
1072 J := J + 1;
1073 end loop;
1074 end Create_Simple;
1075
1076 -- Start of processing for Create_Primary_Table_Glob
1077
1078 begin
1079 Table.all := (others => (others => 0));
1080 Create_Simple (S'First, S'Last, Start_State, End_State);
1081 Num_States := Current_State;
1082 end Create_Primary_Table_Glob;
1083
1084 ----------------------------
1085 -- Create_Secondary_Table --
1086 ----------------------------
1087
1088 function Create_Secondary_Table
1089 (First_Table : Regexp_Array_Access;
1090 Num_States : State_Index;
1091 Start_State : State_Index;
1092 End_State : State_Index) return Regexp
1093 is
1094 pragma Warnings (Off, Num_States);
1095
1096 Last_Index : constant State_Index := First_Table'Last (1);
1097 type Meta_State is array (1 .. Last_Index) of Boolean;
1098
1099 Table : Regexp_Array (1 .. Last_Index, 0 .. Alphabet_Size) :=
1100 (others => (others => 0));
1101
1102 Meta_States : array (1 .. Last_Index + 1) of Meta_State :=
1103 (others => (others => False));
1104
1105 Temp_State_Not_Null : Boolean;
1106
1107 Is_Final : Boolean_Array (1 .. Last_Index) := (others => False);
1108
1109 Current_State : State_Index := 1;
1110 Nb_State : State_Index := 1;
1111
1112 procedure Closure
1113 (State : in out Meta_State;
1114 Item : State_Index);
1115 -- Compute the closure of the state (that is every other state which
1116 -- has a empty-character transition) and add it to the state
1117
1118 -------------
1119 -- Closure --
1120 -------------
1121
1122 procedure Closure
1123 (State : in out Meta_State;
1124 Item : State_Index)
1125 is
1126 begin
1127 if State (Item) then
1128 return;
1129 end if;
1130
1131 State (Item) := True;
1132
1133 for Column in Alphabet_Size + 1 .. First_Table'Last (2) loop
1134 if First_Table (Item, Column) = 0 then
1135 return;
1136 end if;
1137
1138 Closure (State, First_Table (Item, Column));
1139 end loop;
1140 end Closure;
1141
1142 -- Start of procesing for Create_Secondary_Table
1143
1144 begin
1145 -- Create a new state
1146
1147 Closure (Meta_States (Current_State), Start_State);
1148
1149 while Current_State <= Nb_State loop
1150
1151 -- If this new meta-state includes the primary table end state,
1152 -- then this meta-state will be a final state in the regexp
1153
1154 if Meta_States (Current_State)(End_State) then
1155 Is_Final (Current_State) := True;
1156 end if;
1157
1158 -- For every character in the regexp, calculate the possible
1159 -- transitions from Current_State
1160
1161 for Column in 0 .. Alphabet_Size loop
1162 Meta_States (Nb_State + 1) := (others => False);
1163 Temp_State_Not_Null := False;
1164
1165 for K in Meta_States (Current_State)'Range loop
1166 if Meta_States (Current_State)(K)
1167 and then First_Table (K, Column) /= 0
1168 then
1169 Closure
1170 (Meta_States (Nb_State + 1), First_Table (K, Column));
1171 Temp_State_Not_Null := True;
1172 end if;
1173 end loop;
1174
1175 -- If at least one transition existed
1176
1177 if Temp_State_Not_Null then
1178
1179 -- Check if this new state corresponds to an old one
1180
1181 for K in 1 .. Nb_State loop
1182 if Meta_States (K) = Meta_States (Nb_State + 1) then
1183 Table (Current_State, Column) := K;
1184 exit;
1185 end if;
1186 end loop;
1187
1188 -- If not, create a new state
1189
1190 if Table (Current_State, Column) = 0 then
1191 Nb_State := Nb_State + 1;
1192 Table (Current_State, Column) := Nb_State;
1193 end if;
1194 end if;
1195 end loop;
1196
1197 Current_State := Current_State + 1;
1198 end loop;
1199
1200 -- Returns the regexp
1201
1202 declare
1203 R : Regexp_Access;
1204
1205 begin
1206 R := new Regexp_Value (Alphabet_Size => Alphabet_Size,
1207 Num_States => Nb_State);
1208 R.Map := Map;
1209 R.Is_Final := Is_Final (1 .. Nb_State);
1210 R.Case_Sensitive := Case_Sensitive;
1211
1212 for State in 1 .. Nb_State loop
1213 for K in 0 .. Alphabet_Size loop
1214 R.States (State, K) := Table (State, K);
1215 end loop;
1216 end loop;
1217
1218 return (Ada.Finalization.Controlled with R => R);
1219 end;
1220 end Create_Secondary_Table;
1221
1222 ---------------------
1223 -- Raise_Exception --
1224 ---------------------
1225
1226 procedure Raise_Exception
1227 (M : String;
1228 Index : Integer)
1229 is
1230 begin
1231 Ada.Exceptions.Raise_Exception
1232 (Error_In_Regexp'Identity, M & " at offset " & Index'Img);
1233 end Raise_Exception;
1234
1235 -- Start of processing for Compile
1236
1237 begin
1238 -- Special case for the empty string: it always matches, and the
1239 -- following processing would fail on it.
1240 if S = "" then
1241 return (Ada.Finalization.Controlled with
1242 R => new Regexp_Value'
1243 (Alphabet_Size => 0,
1244 Num_States => 1,
1245 Map => (others => 0),
1246 States => (others => (others => 1)),
1247 Is_Final => (others => True),
1248 Case_Sensitive => True));
1249 end if;
1250
1251 if not Case_Sensitive then
1252 System.Case_Util.To_Lower (S);
1253 end if;
1254
1255 Create_Mapping;
1256
1257 -- Creates the primary table
1258
1259 declare
1260 Table : Regexp_Array_Access;
1261 Num_States : State_Index;
1262 Start_State : State_Index;
1263 End_State : State_Index;
1264 R : Regexp;
1265
1266 begin
1267 Table := new Regexp_Array (1 .. 100,
1268 0 .. Alphabet_Size + 10);
1269 if not Glob then
1270 Create_Primary_Table (Table, Num_States, Start_State, End_State);
1271 else
1272 Create_Primary_Table_Glob
1273 (Table, Num_States, Start_State, End_State);
1274 end if;
1275
1276 -- Creates the secondary table
1277
1278 R := Create_Secondary_Table
1279 (Table, Num_States, Start_State, End_State);
1280 Free (Table);
1281 return R;
1282 end;
1283 end Compile;
1284
1285 --------------
1286 -- Finalize --
1287 --------------
1288
1289 procedure Finalize (R : in out Regexp) is
1290 procedure Free is new
1291 Ada.Unchecked_Deallocation (Regexp_Value, Regexp_Access);
1292
1293 begin
1294 Free (R.R);
1295 end Finalize;
1296
1297 ---------
1298 -- Get --
1299 ---------
1300
1301 function Get
1302 (Table : Regexp_Array_Access;
1303 State : State_Index;
1304 Column : Column_Index) return State_Index
1305 is
1306 begin
1307 if State <= Table'Last (1)
1308 and then Column <= Table'Last (2)
1309 then
1310 return Table (State, Column);
1311 else
1312 return 0;
1313 end if;
1314 end Get;
1315
1316 -----------
1317 -- Match --
1318 -----------
1319
1320 function Match (S : String; R : Regexp) return Boolean is
1321 Current_State : State_Index := 1;
1322
1323 begin
1324 if R.R = null then
1325 raise Constraint_Error;
1326 end if;
1327
1328 for Char in S'Range loop
1329
1330 if R.R.Case_Sensitive then
1331 Current_State := R.R.States (Current_State, R.R.Map (S (Char)));
1332 else
1333 Current_State :=
1334 R.R.States (Current_State,
1335 R.R.Map (System.Case_Util.To_Lower (S (Char))));
1336 end if;
1337
1338 if Current_State = 0 then
1339 return False;
1340 end if;
1341
1342 end loop;
1343
1344 return R.R.Is_Final (Current_State);
1345 end Match;
1346
1347 ---------
1348 -- Set --
1349 ---------
1350
1351 procedure Set
1352 (Table : in out Regexp_Array_Access;
1353 State : State_Index;
1354 Column : Column_Index;
1355 Value : State_Index)
1356 is
1357 New_Lines : State_Index;
1358 New_Columns : Column_Index;
1359 New_Table : Regexp_Array_Access;
1360
1361 begin
1362 if State <= Table'Last (1)
1363 and then Column <= Table'Last (2)
1364 then
1365 Table (State, Column) := Value;
1366 else
1367 -- Doubles the size of the table until it is big enough that
1368 -- (State, Column) is a valid index
1369
1370 New_Lines := Table'Last (1) * (State / Table'Last (1) + 1);
1371 New_Columns := Table'Last (2) * (Column / Table'Last (2) + 1);
1372 New_Table := new Regexp_Array (Table'First (1) .. New_Lines,
1373 Table'First (2) .. New_Columns);
1374 New_Table.all := (others => (others => 0));
1375
1376 for J in Table'Range (1) loop
1377 for K in Table'Range (2) loop
1378 New_Table (J, K) := Table (J, K);
1379 end loop;
1380 end loop;
1381
1382 Free (Table);
1383 Table := New_Table;
1384 Table (State, Column) := Value;
1385 end if;
1386 end Set;
1387
1388 end System.Regexp;