From: Matt Turner Date: Fri, 20 Jul 2012 17:06:35 +0000 (-0700) Subject: Remove _mesa_inv_sqrtf in favor of 1/SQRTF X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f58ba6ca9147137c7a2d31a1014235f7077b7752;p=mesa.git Remove _mesa_inv_sqrtf in favor of 1/SQRTF Except for a couple of explicit uses, _mesa_inv_sqrtf was disabled since its addition in 2003 (see f9b1e524). Reviewed-by: Brian Paul Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index fc30a6eb671..e7e877be002 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -244,112 +244,6 @@ _mesa_memset16( unsigned short *dst, unsigned short val, size_t n ) /*@{*/ -/** - inv_sqrt - A single precision 1/sqrt routine for IEEE format floats. - written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk - and Vesa Karvonen. -*/ -float -_mesa_inv_sqrtf(float n) -{ -#if defined(USE_IEEE) && !defined(DEBUG) - float r0, x0, y0; - float r1, x1, y1; - float r2, x2, y2; -#if 0 /* not used, see below -BP */ - float r3, x3, y3; -#endif - fi_type u; - unsigned int magic; - - /* - Exponent part of the magic number - - - We want to: - 1. subtract the bias from the exponent, - 2. negate it - 3. divide by two (rounding towards -inf) - 4. add the bias back - - Which is the same as subtracting the exponent from 381 and dividing - by 2. - - floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2) - */ - - magic = 381 << 23; - - /* - Significand part of magic number - - - With the current magic number, "(magic - u.i) >> 1" will give you: - - for 1 <= u.f <= 2: 1.25 - u.f / 4 - for 2 <= u.f <= 4: 1.00 - u.f / 8 - - This isn't a bad approximation of 1/sqrt. The maximum difference from - 1/sqrt will be around .06. After three Newton-Raphson iterations, the - maximum difference is less than 4.5e-8. (Which is actually close - enough to make the following bias academic...) - - To get a better approximation you can add a bias to the magic - number. For example, if you subtract 1/2 of the maximum difference in - the first approximation (.03), you will get the following function: - - for 1 <= u.f <= 2: 1.22 - u.f / 4 - for 2 <= u.f <= 3.76: 0.97 - u.f / 8 - for 3.76 <= u.f <= 4: 0.72 - u.f / 16 - (The 3.76 to 4 range is where the result is < .5.) - - This is the closest possible initial approximation, but with a maximum - error of 8e-11 after three NR iterations, it is still not perfect. If - you subtract 0.0332281 instead of .03, the maximum error will be - 2.5e-11 after three NR iterations, which should be about as close as - is possible. - - for 1 <= u.f <= 2: 1.2167719 - u.f / 4 - for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8 - for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16 - - */ - - magic -= (int)(0.0332281 * (1 << 25)); - - u.f = n; - u.i = (magic - u.i) >> 1; - - /* - Instead of Newton-Raphson, we use Goldschmidt's algorithm, which - allows more parallelism. From what I understand, the parallelism - comes at the cost of less precision, because it lets error - accumulate across iterations. - */ - x0 = 1.0f; - y0 = 0.5f * n; - r0 = u.f; - - x1 = x0 * r0; - y1 = y0 * r0 * r0; - r1 = 1.5f - y1; - - x2 = x1 * r1; - y2 = y1 * r1 * r1; - r2 = 1.5f - y2; - -#if 1 - return x2 * r2; /* we can stop here, and be conformant -BP */ -#else - x3 = x2 * r2; - y3 = y2 * r2 * r2; - r3 = 1.5f - y3; - - return x3 * r3; -#endif -#else - return (float) (1.0 / sqrt(n)); -#endif -} - #ifndef __GNUC__ /** * Find the first bit set in a word. diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index e825f21801b..254440093c5 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -105,11 +105,7 @@ typedef union { GLfloat f; GLint i; } fi_type; /*** *** INV_SQRTF: single-precision inverse square root ***/ -#if 0 -#define INV_SQRTF(X) _mesa_inv_sqrt(X) -#else -#define INV_SQRTF(X) (1.0F / SQRTF(X)) /* this is faster on a P4 */ -#endif +#define INV_SQRTF(X) (1.0F / SQRTF(X)) /** @@ -565,9 +561,6 @@ _mesa_realloc( void *oldBuffer, size_t oldSize, size_t newSize ); extern void _mesa_memset16( unsigned short *dst, unsigned short val, size_t n ); -extern float -_mesa_inv_sqrtf(float x); - #ifndef FFS_DEFINED #define FFS_DEFINED 1 diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c index 50b5fcb4c63..a28ad0daf80 100644 --- a/src/mesa/tnl/t_rasterpos.c +++ b/src/mesa/tnl/t_rasterpos.c @@ -271,7 +271,7 @@ compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye rz = u[2] - normal[2] * two_nu; m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F); if (m > 0.0F) - mInv = 0.5F * _mesa_inv_sqrtf(m); + mInv = 0.5F * INV_SQRTF(m); else mInv = 0.0F; diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c index 61430c396d5..d4c788523db 100644 --- a/src/mesa/tnl/t_vb_texgen.c +++ b/src/mesa/tnl/t_vb_texgen.c @@ -117,7 +117,7 @@ static void build_m3( GLfloat f[][3], GLfloat m[], fz = f[i][2] = u[2] - norm[2] * two_nu; m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F); if (m[i] != 0.0F) { - m[i] = 0.5F * _mesa_inv_sqrtf(m[i]); + m[i] = 0.5F * INV_SQRTF(m[i]); } } } @@ -146,7 +146,7 @@ static void build_m2( GLfloat f[][3], GLfloat m[], fz = f[i][2] = u[2] - norm[2] * two_nu; m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F); if (m[i] != 0.0F) { - m[i] = 0.5F * _mesa_inv_sqrtf(m[i]); + m[i] = 0.5F * INV_SQRTF(m[i]); } } }