.. _Floating_Point_Operations:
-Floating_Point_Operations
+Floating Point Operations
^^^^^^^^^^^^^^^^^^^^^^^^^
.. index:: Floating-Point Operations
* Optimization Levels::
* Debugging Optimized Code::
* Inlining of Subprograms::
-* Floating_Point_Operations::
+* Floating Point Operations::
* Vectorization of loops::
* Other Optimization Switches::
* Optimization and Strict Aliasing::
* Optimization Levels::
* Debugging Optimized Code::
* Inlining of Subprograms::
-* Floating_Point_Operations::
+* Floating Point Operations::
* Vectorization of loops::
* Other Optimization Switches::
* Optimization and Strict Aliasing::
on the resulting executable,
which removes both debugging information and global symbols.
-@node Inlining of Subprograms,Floating_Point_Operations,Debugging Optimized Code,Performance Considerations
+@node Inlining of Subprograms,Floating Point Operations,Debugging Optimized Code,Performance Considerations
@anchor{gnat_ugn/gnat_and_program_execution id32}@anchor{185}@anchor{gnat_ugn/gnat_and_program_execution inlining-of-subprograms}@anchor{100}
@subsubsection Inlining of Subprograms
indeed you should use @code{-O3} only if tests show that it actually
improves performance for your program.
-@node Floating_Point_Operations,Vectorization of loops,Inlining of Subprograms,Performance Considerations
+@node Floating Point Operations,Vectorization of loops,Inlining of Subprograms,Performance Considerations
@anchor{gnat_ugn/gnat_and_program_execution floating-point-operations}@anchor{186}@anchor{gnat_ugn/gnat_and_program_execution id33}@anchor{187}
-@subsubsection Floating_Point_Operations
+@subsubsection Floating Point Operations
@geindex Floating-Point Operations
so it is permissible to mix units compiled with and without these
switches.
-@node Vectorization of loops,Other Optimization Switches,Floating_Point_Operations,Performance Considerations
+@node Vectorization of loops,Other Optimization Switches,Floating Point Operations,Performance Considerations
@anchor{gnat_ugn/gnat_and_program_execution id34}@anchor{188}@anchor{gnat_ugn/gnat_and_program_execution vectorization-of-loops}@anchor{189}
@subsubsection Vectorization of loops
package body System.Val_Real is
- package Impl is new Value_R (Uns, Floating => True);
+ pragma Assert (Num'Machine_Mantissa <= Uns'Size);
+ -- We need an unsigned type large enough to represent the mantissa
+
+ Precision_Limit : constant Uns := 2**Num'Machine_Mantissa - 1;
+ -- We use the precision of the floating-point type
+
+ package Impl is new Value_R (Uns, Precision_Limit, Floating => True);
function Integer_to_Real
(Str : String;
Val : Uns;
Base : Unsigned;
Scale : Integer;
+ Extra : Unsigned;
Minus : Boolean) return Num;
-- Convert the real value from integer to real representation
Val : Uns;
Base : Unsigned;
Scale : Integer;
+ Extra : Unsigned;
Minus : Boolean) return Num
is
pragma Assert (Base in 2 .. 16);
pragma Unsuppress (Range_Check);
R_Val : Num;
+ S : Integer := Scale;
begin
-- We call the floating-point processor reset routine so we can be sure
System.Float_Control.Reset;
end if;
- -- Compute the final value with a single rounding if possible
+ -- Take into account the extra digit near the limit to avoid anomalies
+
+ if Extra > 0 and then Val <= Precision_Limit / Uns (Base) then
+ R_Val := Num (Val * Uns (Base)) + Num (Extra);
+ S := S - 1;
+ else
+ R_Val := Num (Val);
+ end if;
+
+ -- Compute the final value
- if Scale < 0 then
- R_Val := Num (Val) / Num (Base) ** (-Scale);
+ if S < 0 then
+ R_Val := R_Val / Num (Base) ** (-S);
else
- R_Val := Num (Val) * Num (Base) ** Scale;
+ R_Val := R_Val * Num (Base) ** S;
end if;
-- Finally deal with initial minus sign, note that this processing is
Base : Unsigned;
Scale : Integer;
Extra : Unsigned;
- pragma Unreferenced (Extra);
Minus : Boolean;
Val : Uns;
begin
Val := Impl.Scan_Raw_Real (Str, Ptr, Max, Base, Scale, Extra, Minus);
- return Integer_to_Real (Str, Val, Base, Scale, Minus);
+ return Integer_to_Real (Str, Val, Base, Scale, Extra, Minus);
end Scan_Real;
----------------
Base : Unsigned;
Scale : Integer;
Extra : Unsigned;
- pragma Unreferenced (Extra);
Minus : Boolean;
Val : Uns;
begin
Val := Impl.Value_Raw_Real (Str, Base, Scale, Extra, Minus);
- return Integer_to_Real (Str, Val, Base, Scale, Minus);
+ return Integer_to_Real (Str, Val, Base, Scale, Extra, Minus);
end Value_Real;
end System.Val_Real;
package body System.Value_D is
- package Impl is new Value_R (Uns, Floating => False);
+ pragma Assert (Int'Size <= Uns'Size);
+ -- We need an unsigned type large enough to represent the mantissa
+
+ package Impl is new Value_R (Uns, 2**(Int'Size - 1), Floating => False);
function Integer_to_Decimal
(Str : String;
-- supported values for the base of the literal. Given that the largest
-- supported base is 16, this gives a limit of 2**(Int'Size - 5).
- package Impl is new Value_R (Uns, Floating => False);
+ pragma Assert (Int'Size <= Uns'Size);
+ -- We need an unsigned type large enough to represent the mantissa
+
+ package Impl is new Value_R (Uns, 2**(Int'Size - 1), Floating => False);
function Integer_To_Fixed
(Str : String;
package body System.Value_R is
- Precision_Limit : constant Uns := 2 ** (Uns'Size - 1);
- -- Limit beyond which additional digits are dropped
-
subtype Char_As_Digit is Unsigned range 0 .. 17;
subtype Valid_Digit is Char_As_Digit range 0 .. 15;
E_Digit : constant Char_As_Digit := 14;
Temp := Value * Uns (Base) + Uns (Digit);
+ -- Check if Temp is larger than Precision_Limit, taking into
+ -- account that Temp may have wrapped around.
+
if Value <= Umax
- or else (Value <= UmaxB and then Temp <= Precision_Limit)
+ or else (Value <= UmaxB
+ and then Temp <= Precision_Limit
+ and then Temp >= Uns (Base))
then
Value := Temp;
Scale := Scale - 1;
else
Temp := Value * Uns (Base) + Uns (Digit);
+ -- Check if Temp is larger than Precision_Limit, taking into
+ -- account that Temp may have wrapped around.
+
if Value <= Umax
- or else (Value <= UmaxB and then Temp <= Precision_Limit)
+ or else (Value <= UmaxB
+ and then Temp <= Precision_Limit
+ and then Temp >= Uns (Base))
then
Value := Temp;
type Uns is mod <>;
+ Precision_Limit : Uns;
+
Floating : Boolean;
package System.Value_R is