-- --
-- B o d y --
-- --
--- Copyright (C) 2002-2007, AdaCore --
+-- Copyright (C) 2002-2008, AdaCore --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
-- where f1 and f2 are functions that map strings into integers, and g is a
-- function that maps integers into [0, m-1]. h can be order preserving.
- -- For instance, let W = {w_0, ..., w_i, ...,
- -- w_m-1}, h can be defined such that h (w_i) = i.
+ -- For instance, let W = {w_0, ..., w_i, ..., w_m-1}, h can be defined
+ -- such that h (w_i) = i.
-- This algorithm defines two possible constructions of f1 and f2. Method
-- b) stores the hash function in less memory space at the expense of
-- probability of generating an acyclic graph, n >= 2m. If it is not
-- acyclic, Tk have to be regenerated.
- -- In the assignment step, the algorithm builds function g. As is acyclic,
- -- there is a vertex v1 with only one neighbor v2. Let w_i be the word such
- -- that v1 = f1 (w_i) and v2 = f2 (w_i). Let g (v1) = 0 by construction and
- -- g (v2) = (i - g (v1)) mod n (or to be general, (h (i) - g (v1) mod n).
+ -- In the assignment step, the algorithm builds function g. As G is
+ -- acyclic, there is a vertex v1 with only one neighbor v2. Let w_i be
+ -- the word such that v1 = f1 (w_i) and v2 = f2 (w_i). Let g (v1) = 0 by
+ -- construction and g (v2) = (i - g (v1)) mod n (or h (i) - g (v1) mod n).
-- If word w_j is such that v2 = f1 (w_j) and v3 = f2 (w_j), g (v3) = (j -
-- g (v2)) mod (or to be general, (h (j) - g (v2)) mod n). If w_i has no
-- neighbor, then another vertex is selected. The algorithm traverses G to
No_Edge : constant Edge_Id := -1;
No_Table : constant Table_Id := -1;
- Max_Word_Length : constant := 32;
- subtype Word_Type is String (1 .. Max_Word_Length);
- Null_Word : constant Word_Type := (others => ASCII.NUL);
- -- Store keyword in a word. Note that the length of word is limited to 32
- -- characters.
+ type Word_Type is new String_Access;
+ procedure Free_Word (W : in out Word_Type);
+ function New_Word (S : String) return Word_Type;
+
+ procedure Resize_Word (W : in out Word_Type; Len : Natural);
+ -- Resize string W to have a length Len
type Key_Type is record
Edge : Edge_Id;
package WT is new GNAT.Table (Word_Type, Word_Id, 0, 32, 32);
package IT is new GNAT.Table (Integer, Integer, 0, 32, 32);
- -- The two main tables. IT is used to store several tables of components
- -- containing only integers.
+ -- The two main tables. WT is used to store the words in their initial
+ -- version and in their reduced version (that is words reduced to
+ -- their significant characters). As an instance of GNAT.Table, WT does
+ -- not initialize string pointers to null. This initialization has to be
+ -- done manually when the table is allocated. IT is used to store several
+ -- tables of components containing only integers.
function Image (Int : Integer; W : Natural := 0) return String;
function Image (Str : String; W : Natural := 0) return String;
function Allocate (N : Natural; S : Natural := 1) return Table_Id;
-- Allocate N * S ints from IT table
- procedure Free_Tmp_Tables;
- -- Deallocate the tables used by the algorithm (but not the keys table)
-
----------
-- Keys --
----------
-- Optimization mode (memory vs CPU)
Max_Key_Len : Natural := 0;
- Min_Key_Len : Natural := Max_Word_Length;
+ Min_Key_Len : Natural := 0;
-- Maximum and minimum of all the word length
S : Natural;
procedure Apply_Position_Selection is
begin
- WT.Set_Last (2 * NK);
for J in 0 .. NK - 1 loop
declare
- I_Word : constant Word_Type := WT.Table (Initial (J));
- R_Word : Word_Type := Null_Word;
- Index : Natural := I_Word'First - 1;
+ IW : constant String := WT.Table (Initial (J)).all;
+ RW : String (1 .. IW'Length) := (others => ASCII.NUL);
+ N : Natural := IW'First - 1;
begin
-- Select the characters of Word included in the position
-- selection.
for C in 0 .. Char_Pos_Set_Len - 1 loop
- exit when I_Word (Get_Char_Pos (C)) = ASCII.NUL;
- Index := Index + 1;
- R_Word (Index) := I_Word (Get_Char_Pos (C));
+ exit when IW (Get_Char_Pos (C)) = ASCII.NUL;
+ N := N + 1;
+ RW (N) := IW (Get_Char_Pos (C));
end loop;
- -- Build the new table with the reduced word
+ -- Build the new table with the reduced word. Be careful
+ -- to deallocate the old version to avoid memory leaks.
- WT.Table (Reduced (J)) := R_Word;
+ Free_Word (WT.Table (Reduced (J)));
+ WT.Table (Reduced (J)) := New_Word (RW);
Set_Key (J, (Edge => No_Edge));
end;
end loop;
-- Start of processing for Assign_Values_To_Vertices
begin
- -- Value -1 denotes an uninitialized value as it is supposed to
+ -- Value -1 denotes an unitialized value as it is supposed to
-- be in the range 0 .. NK.
if G = No_Table then
Success : Boolean := False;
begin
- NV := Natural (K2V * Float (NK));
-
- Keys := Allocate (NK);
+ if NK = 0 then
+ raise Program_Error with "keywords set cannot be empty";
+ end if;
if Verbose then
Put_Initial_Keys (Output, "Initial Key Table");
procedure Finalize is
begin
- Free_Tmp_Tables;
+ -- Deallocate all the WT components (both initial and reduced
+ -- ones) to avoid memory leaks.
+ for W in 0 .. WT.Last loop
+ Free_Word (WT.Table (W));
+ end loop;
WT.Release;
IT.Release;
- NK := 0;
- Max_Key_Len := 0;
- Min_Key_Len := Max_Word_Length;
- end Finalize;
-
- ---------------------
- -- Free_Tmp_Tables --
- ---------------------
-
- procedure Free_Tmp_Tables is
- begin
- IT.Init;
+ -- Reset all variables for next usage
Keys := No_Table;
Vertices := No_Table;
NV := 0;
- end Free_Tmp_Tables;
+
+ NK := 0;
+ Max_Key_Len := 0;
+ Min_Key_Len := 0;
+ end Finalize;
+
+ ---------------
+ -- Free_Word --
+ ---------------
+
+ procedure Free_Word (W : in out Word_Type) is
+ begin
+ if W /= null then
+ Free (W);
+ end if;
+ end Free_Word;
----------------------------
-- Generate_Mapping_Table --
Tries : Positive := Default_Tries)
is
begin
- -- Free previous tables (the settings may have changed between two runs)
+ -- Deallocated the part of the table concerning the reduced
+ -- words. Initial words are already present in the table. We
+ -- may have reduced words already there because a previous
+ -- computation failed. We are currently retrying and the
+ -- reduced words have to be deallocated.
+
+ for W in NK .. WT.Last loop
+ Free_Word (WT.Table (W));
+ end loop;
+ IT.Init;
- Free_Tmp_Tables;
+ -- Initialize of computation variables
- if K_To_V <= 2.0 then
- Put (Output, "K to V ratio cannot be lower than 2.0");
- New_Line (Output);
- raise Program_Error;
- end if;
+ Keys := No_Table;
+
+ Char_Pos_Set := No_Table;
+ Char_Pos_Set_Len := 0;
+
+ Used_Char_Set := No_Table;
+ Used_Char_Set_Len := 0;
+
+ T1 := No_Table;
+ T2 := No_Table;
+
+ T1_Len := 0;
+ T2_Len := 0;
+
+ G := No_Table;
+ G_Len := 0;
+
+ Edges := No_Table;
+ Edges_Len := 0;
+
+ Vertices := No_Table;
+ NV := 0;
S := Seed;
K2V := K_To_V;
Opt := Optim;
NT := Tries;
+
+ if K2V <= 2.0 then
+ raise Program_Error with "K to V ratio cannot be lower than 2.0";
+ end if;
+
+ -- Do not accept a value of K2V too close to 2.0 such that once
+ -- rounded up, NV = 2 * NK because the algorithm would not converge.
+
+ NV := Natural (Float (NK) * K2V);
+ if NV <= 2 * NK then
+ NV := 2 * NK + 1;
+ end if;
+
+ Keys := Allocate (NK);
+
+ -- Resize initial words to have all of them at the same size
+ -- (so the size of the largest one).
+
+ for K in 0 .. NK - 1 loop
+ Resize_Word (WT.Table (Initial (K)), Max_Key_Len);
+ end loop;
+
+ -- Allocated the table to store the reduced words. As WT is a
+ -- GNAT.Table (using C memory management), pointers have to be
+ -- explicitly initialized to null.
+
+ WT.Set_Last (Reduced (NK - 1));
+ for W in 0 .. NK - 1 loop
+ WT.Table (Reduced (W)) := null;
+ end loop;
end Initialize;
------------
------------
procedure Insert (Value : String) is
- Word : Word_Type := Null_Word;
Len : constant Natural := Value'Length;
begin
- Word (1 .. Len) := Value (Value'First .. Value'First + Len - 1);
WT.Set_Last (NK);
- WT.Table (NK) := Word;
+ WT.Table (NK) := New_Word (Value);
NK := NK + 1;
- NV := Natural (Float (NK) * K2V);
-
- -- Do not accept a value of K2V too close to 2.0 such that once rounded
- -- up, NV = 2 * NK because the algorithm would not converge.
-
- if NV <= 2 * NK then
- NV := 2 * NK + 1;
- end if;
if Max_Key_Len < Len then
Max_Key_Len := Len;
end if;
- if Len < Min_Key_Len then
+ if Min_Key_Len = 0 or else Len < Min_Key_Len then
Min_Key_Len := Len;
end if;
end Insert;
end if;
end New_Line;
+ --------------
+ -- New_Word --
+ --------------
+
+ function New_Word (S : String) return Word_Type is
+ begin
+ return new String'(S);
+ end New_Word;
+
------------------------------
-- Parse_Position_Selection --
------------------------------
K := Get_Key (J);
Put (File, Image (J, M), F1, L1, J, 1, 3, 1);
Put (File, Image (K.Edge, M), F1, L1, J, 1, 3, 2);
- Put (File, WT.Table (Initial (J)), F1, L1, J, 1, 3, 3);
+ Put (File, WT.Table (Initial (J)).all, F1, L1, J, 1, 3, 3);
end loop;
end Put_Initial_Keys;
K := Get_Key (J);
Put (File, Image (J, M), F1, L1, J, 1, 3, 1);
Put (File, Image (K.Edge, M), F1, L1, J, 1, 3, 2);
- Put (File, WT.Table (Reduced (J)), F1, L1, J, 1, 3, 3);
+ Put (File, WT.Table (Reduced (J)).all, F1, L1, J, 1, 3, 3);
end loop;
end Put_Reduced_Keys;
return K + NK + 1;
end Reduced;
+ -----------------
+ -- Resize_Word --
+ -----------------
+
+ procedure Resize_Word (W : in out Word_Type; Len : Natural) is
+ S1 : constant String := W.all;
+ S2 : String (1 .. Len) := (others => ASCII.NUL);
+ L : constant Natural := S1'Length;
+ begin
+ if L /= Len then
+ Free_Word (W);
+ S2 (1 .. L) := S1;
+ W := New_Word (S2);
+ end if;
+ end Resize_Word;
+
--------------------------
-- Select_Char_Position --
--------------------------
begin
if L = 0 then
- Left := Reduced (0) - 1;
+ Left := NK;
Right := Offset + R;
elsif R = 0 then
Left := Offset + L;
- Right := Reduced (0) - 1;
+ Right := NK;
else
Left := Offset + L;
Right := Offset + R;
begin
if From = 0 then
- Source := Reduced (0) - 1;
+ Source := NK;
Target := Offset + To;
elsif To = 0 then
Source := Offset + From;
- Target := Reduced (0) - 1;
+ Target := NK;
else
Source := Offset + From;
Target := Offset + To;
end if;
WT.Table (Target) := WT.Table (Source);
+ WT.Table (Source) := null;
end Move;
package Sorting is new GNAT.Heap_Sort_G (Move, Lt);
begin
-- Initialize the reduced words set
- WT.Set_Last (2 * NK);
for K in 0 .. NK - 1 loop
- WT.Table (Reduced (K)) := WT.Table (Initial (K));
+ WT.Table (Reduced (K)) := New_Word (WT.Table (Initial (K)).all);
end loop;
declare
Same_Keys_Sets_Table (J).First ..
Same_Keys_Sets_Table (J).Last
loop
- Put (Output, WT.Table (Reduced (K)));
+ Put (Output, WT.Table (Reduced (K)).all);
New_Line (Output);
end loop;
Put (Output, "--");
-- --
-- S p e c --
-- --
--- Copyright (C) 2002-2005, AdaCore --
+-- Copyright (C) 2002-2008, AdaCore --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
-- The hash table size corresponds to the exact size of W and *no larger*.
-- This represents the "minimal" property.
--- The functions generated by this package require the key set to be known in
+-- The functions generated by this package require the words to be known in
-- advance (they are "static" hash functions). The hash functions are also
-- order preserving. If w2 is inserted after w1 in the generator, then (w1)
-- < f (w2). These hashing functions are convenient for use with realtime
K_To_V : Float := Default_K_To_V;
Optim : Optimization := CPU_Time;
Tries : Positive := Default_Tries);
- -- Initialize the generator and its internal structures. Set the ratio of
- -- vertices over keys in the random graphs. This value has to be greater
- -- than 2.0 in order for the algorithm to succeed. The key set is not
- -- modified (in particular when it is already set). For instance, it is
- -- possible to run several times the generator with different settings on
- -- the same key set.
+ -- Initialize the generator and its internal structures. Set the
+ -- ratio of vertices over keys in the random graphs. This value
+ -- has to be greater than 2.0 in order for the algorithm to
+ -- succeed. The word set is not modified (in particular when it is
+ -- already set). For instance, it is possible to run several times
+ -- the generator with different settings on the same words.
+
+ -- A classical way of doing is to Insert all the words and then to
+ -- invoke Initialize and Compute. If Compute fails to find a
+ -- perfect hash function, invoke Initialize another time with
+ -- other configuration parameters (probably with a greater K_To_V
+ -- ratio). Once successful, invoke Produce and Finalize.
procedure Finalize;
- -- Deallocate the internal structures and the key table
+ -- Deallocate the internal structures and the words table
procedure Insert (Value : String);
- -- Insert a new key in the table
+ -- Insert a new word in the table
Too_Many_Tries : exception;
-- Raised after Tries unsuccessful runs
procedure Compute (Position : String := Default_Position);
-- Compute the hash function. Position allows to define selection of
- -- character positions used in the keywords hash function. Positions can be
+ -- character positions used in the word hash function. Positions can be
-- separated by commas and range like x-y may be used. Character '$'
- -- represents the final character of a key. With an empty position, the
+ -- represents the final character of a word. With an empty position, the
-- generator automatically produces positions to reduce the memory usage.
-- Raise Too_Many_Tries in case that the algorithm does not succeed in less
-- than Tries attempts (see Initialize).
-- F1 and F2 are two functions based on two function tables T1 and T2.
-- Their definition depends on the chosen optimization mode.
- -- Only some character positions are used in the keys because they are
+ -- Only some character positions are used in the words because they are
-- significant. They are listed in a character position table (P in the
-- pseudo-code below). For instance, in {"jan", "feb", "mar", "apr", "jun",
-- "jul", "aug", "sep", "oct", "nov", "dec"}, only positions 2 and 3 are
-- {2, 3}
-- When Optimization is CPU_Time, the first dimension of T1 and T2
- -- corresponds to the character position in the key and the second to the
+ -- corresponds to the character position in the word and the second to the
-- character set. As all the character set is not used, we define a used
-- character table which associates a distinct index to each used character
-- (unused characters are mapped to zero). In this case, the second
-- end Hash;
-- When Optimization is Memory_Space, the first dimension of T1 and T2
- -- corresponds to the character position in the key and the second
+ -- corresponds to the character position in the word and the second
-- dimension is ignored. T1 and T2 are no longer matrices but vectors.
-- Therefore, the used character table is not available. The hash function
-- has the following form: