From 52608d2d21d401ac243d84409ec4043731a4f21a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 10 Jun 2005 08:32:27 +0000
Subject: [PATCH] Add notes about intended precision of opcodes. Remove dead
 floating point twiddles. Fix bug translating MAD->MUL,ADD.

---
 src/mesa/tnl/t_vb_arbprogram.c | 84 +++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 38 deletions(-)

diff --git a/src/mesa/tnl/t_vb_arbprogram.c b/src/mesa/tnl/t_vb_arbprogram.c
index 4288bd3bef9..457d56071e4 100644
--- a/src/mesa/tnl/t_vb_arbprogram.c
+++ b/src/mesa/tnl/t_vb_arbprogram.c
@@ -61,45 +61,23 @@ struct compilation {
 
 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
 
-
-
-/**
- * Set x to positive or negative infinity.
- *
- * XXX: FIXME - type punning.
- */
-#if defined(USE_IEEE) || defined(_WIN32)
-#define SET_POS_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0x7F800000 )
-#define SET_NEG_INFINITY(x)  ( *((GLuint *) (void *)&x) = 0xFF800000 )
-#elif defined(VMS)
-#define SET_POS_INFINITY(x)  x = __MAXFLOAT
-#define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
-#define IS_INF_OR_NAN(t)   ((t) == __MAXFLOAT)
-#else
-#define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
-#define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
-#endif
-
-#define FREXPF(a,b) frexpf(a,b)
-
 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
 
-/* FIXME: more type punning (despite use of fi_type...)
- */
-#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
 
 
+/* Lower precision functions for the EXP, LOG and LIT opcodes.  The
+ * LOG2() implementation is probably not accurate enough, and the
+ * attempted optimization for Exp2 is definitely not accurate
+ * enough - it discards all of t's fractional bits!
+ */
 static GLfloat RoughApproxLog2(GLfloat t)
 {
    return LOG2(t);
 }
 
-static GLfloat RoughApproxPow2(GLfloat t)
+static GLfloat RoughApproxExp2(GLfloat t)
 {   
 #if 0
-   /* This isn't nearly accurate enough - it discards all of t's
-    * fractional bits!
-    */
    fi_type fi;
    fi.i = (GLint) t;
    fi.i = (fi.i << 23) + 0x3f800000;
@@ -111,11 +89,25 @@ static GLfloat RoughApproxPow2(GLfloat t)
 
 static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
 {
-#if 0
-   return RoughApproxPow2(y * RoughApproxLog2(x));
-#else
+   return RoughApproxExp2(y * RoughApproxLog2(x));
+}
+
+
+/* Higher precision functions for the EX2, LG2 and POW opcodes:
+ */
+static GLfloat ApproxLog2(GLfloat t)
+{
+   return (GLfloat) (log(t) * 1.442695F);
+}
+
+static GLfloat ApproxExp2(GLfloat t)
+{   
+   return (GLfloat) _mesa_pow(2.0, t);
+}
+
+static GLfloat ApproxPower(GLfloat x, GLfloat y)
+{
    return (GLfloat) _mesa_pow(x, y);
-#endif
 }
 
 
@@ -261,15 +253,20 @@ static void do_DST( struct arb_vp_machine *m, union instruction op )
 }
 
 
+/* Intended to be high precision:
+ */
 static void do_EX2( struct arb_vp_machine *m, union instruction op ) 
 {
    GLfloat *result = m->File[0][op.alu.dst];
    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 
-   result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
+   result[0] = (GLfloat)ApproxExp2(arg0[0]);
    PUFF(result);
 }
 
+
+/* Allowed to be lower precision:
+ */
 static void do_EXP( struct arb_vp_machine *m, union instruction op )
 {
    GLfloat *result = m->File[0][op.alu.dst];
@@ -278,11 +275,11 @@ static void do_EXP( struct arb_vp_machine *m, union instruction op )
    GLfloat flr_tmp = FLOORF(tmp);
 
    /* KW: nvvertexec has an optimized version of this which is pretty
-    * hard to understand/validate, but avoids the RoughApproxPow2.
+    * hard to understand/validate, but avoids the RoughApproxExp2.
     */
    result[0] = (GLfloat) (1 << (int)flr_tmp);
    result[1] = tmp - flr_tmp;
-   result[2] = RoughApproxPow2(tmp);
+   result[2] = RoughApproxExp2(tmp);
    result[3] = 1.0F;
 }
 
@@ -308,12 +305,14 @@ static void do_FRC( struct arb_vp_machine *m, union instruction op )
    result[3] = arg0[3] - FLOORF(arg0[3]);
 }
 
+/* High precision log base 2:
+ */
 static void do_LG2( struct arb_vp_machine *m, union instruction op ) 
 {
    GLfloat *result = m->File[0][op.alu.dst];
    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
 
-   result[0] = RoughApproxLog2(arg0[0]);
+   result[0] = ApproxLog2(arg0[0]);
    PUFF(result);
 }
 
@@ -338,13 +337,15 @@ static void do_LIT( struct arb_vp_machine *m, union instruction op )
 }
 
 
+/* Intended to allow a lower precision than required for LG2 above.
+ */
 static void do_LOG( struct arb_vp_machine *m, union instruction op )
 {
    GLfloat *result = m->File[0][op.alu.dst];
    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
    GLfloat tmp = FABSF(arg0[0]);
    int exponent;
-   GLfloat mantissa = FREXPF(tmp, &exponent);
+   GLfloat mantissa = frexpf(tmp, &exponent);
 
    result[0] = (GLfloat) (exponent - 1);
    result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
@@ -401,13 +402,15 @@ static void do_MUL( struct arb_vp_machine *m, union instruction op )
 }
 
 
+/* Intended to be "high" precision
+ */
 static void do_POW( struct arb_vp_machine *m, union instruction op ) 
 {
    GLfloat *result = m->File[0][op.alu.dst];
    const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
    const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
 
-   result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
+   result[0] = (GLfloat)ApproxPower(arg0[0], arg1[0]);
    PUFF(result);
 }
 
@@ -901,6 +904,11 @@ static void cvp_emit_inst( struct compilation *cp,
       op->alu.file1 = reg[2].file;
       op->alu.idx1 = reg[2].idx;
       op->alu.dst = result;
+
+      if (result == REG_RES) {
+	 op = cvp_next_instruction(cp);
+	 op->dword = fixup.dword;
+      }
       break;
 
    case VP_OPCODE_ARL:
-- 
2.30.2