/*@{*/
-/**
- inv_sqrt - A single precision 1/sqrt routine for IEEE format floats.
- written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk
- and Vesa Karvonen.
-*/
-float
-_mesa_inv_sqrtf(float n)
-{
-#if defined(USE_IEEE) && !defined(DEBUG)
- float r0, x0, y0;
- float r1, x1, y1;
- float r2, x2, y2;
-#if 0 /* not used, see below -BP */
- float r3, x3, y3;
-#endif
- fi_type u;
- unsigned int magic;
-
- /*
- Exponent part of the magic number -
-
- We want to:
- 1. subtract the bias from the exponent,
- 2. negate it
- 3. divide by two (rounding towards -inf)
- 4. add the bias back
-
- Which is the same as subtracting the exponent from 381 and dividing
- by 2.
-
- floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2)
- */
-
- magic = 381 << 23;
-
- /*
- Significand part of magic number -
-
- With the current magic number, "(magic - u.i) >> 1" will give you:
-
- for 1 <= u.f <= 2: 1.25 - u.f / 4
- for 2 <= u.f <= 4: 1.00 - u.f / 8
-
- This isn't a bad approximation of 1/sqrt. The maximum difference from
- 1/sqrt will be around .06. After three Newton-Raphson iterations, the
- maximum difference is less than 4.5e-8. (Which is actually close
- enough to make the following bias academic...)
-
- To get a better approximation you can add a bias to the magic
- number. For example, if you subtract 1/2 of the maximum difference in
- the first approximation (.03), you will get the following function:
-
- for 1 <= u.f <= 2: 1.22 - u.f / 4
- for 2 <= u.f <= 3.76: 0.97 - u.f / 8
- for 3.76 <= u.f <= 4: 0.72 - u.f / 16
- (The 3.76 to 4 range is where the result is < .5.)
-
- This is the closest possible initial approximation, but with a maximum
- error of 8e-11 after three NR iterations, it is still not perfect. If
- you subtract 0.0332281 instead of .03, the maximum error will be
- 2.5e-11 after three NR iterations, which should be about as close as
- is possible.
-
- for 1 <= u.f <= 2: 1.2167719 - u.f / 4
- for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8
- for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16
-
- */
-
- magic -= (int)(0.0332281 * (1 << 25));
-
- u.f = n;
- u.i = (magic - u.i) >> 1;
-
- /*
- Instead of Newton-Raphson, we use Goldschmidt's algorithm, which
- allows more parallelism. From what I understand, the parallelism
- comes at the cost of less precision, because it lets error
- accumulate across iterations.
- */
- x0 = 1.0f;
- y0 = 0.5f * n;
- r0 = u.f;
-
- x1 = x0 * r0;
- y1 = y0 * r0 * r0;
- r1 = 1.5f - y1;
-
- x2 = x1 * r1;
- y2 = y1 * r1 * r1;
- r2 = 1.5f - y2;
-
-#if 1
- return x2 * r2; /* we can stop here, and be conformant -BP */
-#else
- x3 = x2 * r2;
- y3 = y2 * r2 * r2;
- r3 = 1.5f - y3;
-
- return x3 * r3;
-#endif
-#else
- return (float) (1.0 / sqrt(n));
-#endif
-}
-
#ifndef __GNUC__
/**
* Find the first bit set in a word.