r300: Fix cos & add scs to fragment program.
authorRune Peterson <rune@megahurts.dk>
Wed, 14 Feb 2007 22:10:52 +0000 (23:10 +0100)
committerJerome Glisse <glisse@freedesktop.org>
Wed, 14 Feb 2007 22:10:52 +0000 (23:10 +0100)
So this do :
 - Fixes COS.
 - Does range reductions for SIN & COS.
 - Adds SCS.
 - removes the optimized version of SIN & COS.
 - tweaked weight (should help on precision).
 - fixed a copy paste typo in emit_arith().

src/mesa/drivers/dri/r300/r300_context.h
src/mesa/drivers/dri/r300/r300_fragprog.c

index b1402351591ab3ddc604350a220bd95085b72f9e..48b50bca653bd78daf636a739033e0a8a619731c 100644 (file)
@@ -731,7 +731,7 @@ struct r300_fragment_program {
        int max_temp_idx;
 
        /* the index of the sin constant is stored here */
-       GLint const_sin;
+       GLint const_sin[2];
        
        GLuint optimization;
 };
index b00cf9ed33e5922706e45692265604c947c977c4..8e45bd54030762af28c48a421c0eb68da718576a 100644 (file)
@@ -33,7 +33,6 @@
 
 /*TODO'S
  *
- * - SCS instructions
  * - Depth write, WPOS/FOGC inputs
  * - FogOption
  * - Verify results of opcodes for accuracy, I've only checked them
@@ -1081,7 +1080,7 @@ static void emit_arith(struct r300_fragment_program *rp,
                                break;
                        }
                        if (emit_sop &&
-                           (s_swiz[REG_GET_VSWZ(src[i])].flags & SLOT_VECTOR)) {
+                           (s_swiz[REG_GET_SSWZ(src[i])].flags & SLOT_VECTOR)) {
                                vpos = spos = MAX2(vpos, spos);
                                break;
                        }
@@ -1204,6 +1203,25 @@ static GLuint get_attrib(struct r300_fragment_program *rp, GLuint attr)
 }
 #endif
 
+static void make_sin_const(struct r300_fragment_program *rp)
+{
+       if(rp->const_sin[0] == -1){
+           GLfloat cnstv[4];
+
+           cnstv[0] = 1.273239545; // 4/PI
+           cnstv[1] =-0.405284735; // -4/(PI*PI)
+           cnstv[2] = 3.141592654; // PI
+           cnstv[3] = 0.2225;      // weight
+           rp->const_sin[0] = emit_const4fv(rp, cnstv);
+
+           cnstv[0] = 0.5;
+           cnstv[1] = -1.5;
+           cnstv[2] = 0.159154943; // 1/(2*PI)
+           cnstv[3] = 6.283185307; // 2*PI
+           rp->const_sin[1] = emit_const4fv(rp, cnstv);
+       }
+}
+
 static GLboolean parse_program(struct r300_fragment_program *rp)
 {      
        struct gl_fragment_program *mp = &rp->mesa_program;
@@ -1260,84 +1278,68 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
                         * cos using a parabola (see SIN):
                         * cos(x):
                         *   x += PI/2
-                        *   x = (x < PI)?x : x-2*PI
+                        *   x = (x/(2*PI))+0.5
+                        *   x = frac(x)
+                        *   x = (x*2*PI)-PI
                         *   result = sin(x)
                         */
                        temp = get_temp_reg(rp);
-                       if(rp->const_sin == -1){
-                           cnstv[0] = 1.273239545;
-                           cnstv[1] =-0.405284735;
-                           cnstv[2] = 3.141592654;
-                           cnstv[3] = 0.225;
-                           rp->const_sin = emit_const4fv(rp, cnstv);
-                       }
-                       cnst = rp->const_sin;                   
+                       make_sin_const(rp);
                        src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 
-                       emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W,
-                                  pfs_half,
-                                  undef,
-                                  undef,
-                                  0);
+                       /* add 0.5*PI and do range reduction */
 
                        emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
-                                  swizzle(cnst, Z, Z, Z, Z), //PI
+                                  swizzle(rp->const_sin[0], Z, Z, Z, Z), //PI
                                   pfs_half,
                                   swizzle(keep(src[0]), X, X, X, X),
                                   0);
 
-                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
-                                  negate(swizzle(temp, W, W, W, W)), //-2
-                                  swizzle(cnst, Z, Z, Z, Z), //PI
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
                                   swizzle(temp, X, X, X, X),
+                                  swizzle(rp->const_sin[1], Z, Z, Z, Z),
+                                  pfs_half,
                                   0);
 
-                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
-                                  swizzle(cnst, Z, Z, Z, Z), //PI
-                                  negate(pfs_half),
-                                  swizzle(src[0], X, X, X, X),
+                       emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
+                                  swizzle(temp, X, X, X, X),
+                                  undef,
+                                  undef,
                                   0);
-                       
-                       emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
-                                  swizzle(temp, W, W, W, W),
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
                                   swizzle(temp, X, X, X, X),
-                                  swizzle(temp, Y, Y, Y, Y), 
+                                  swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+                                  negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //-PI
                                   0);
 
                        /* SIN */
 
                        emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
                                   swizzle(temp, Z, Z, Z, Z),
-                                  cnst,
+                                  rp->const_sin[0],
                                   pfs_zero,
                                   0);
 
-                       if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
-                           emit_arith(rp, PFS_OP_MAD, dest, mask,
-                                      swizzle(temp, Y, Y, Y, Y),
-                                      absolute(swizzle(temp, Z, Z, Z, Z)),
-                                      swizzle(temp, X, X, X, X),
-                                      flags);
-                       }else{
-                           emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
-                                      swizzle(temp, Y, Y, Y, Y),
-                                      absolute(swizzle(temp, Z, Z, Z, Z)),
-                                      swizzle(temp, X, X, X, X),
-                                      0);
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+                                  swizzle(temp, Y, Y, Y, Y),
+                                  absolute(swizzle(temp, Z, Z, Z, Z)),
+                                  swizzle(temp, X, X, X, X),
+                                  0);
                        
-                           emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
-                                      swizzle(temp, X, X, X, X),
-                                      absolute(swizzle(temp, X, X, X, X)),
-                                      negate(swizzle(temp, X, X, X, X)),
-                                      0);
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+                                  swizzle(temp, X, X, X, X),
+                                  absolute(swizzle(temp, X, X, X, X)),
+                                  negate(swizzle(temp, X, X, X, X)),
+                                  0);
 
 
-                           emit_arith(rp, PFS_OP_MAD, dest, mask,
-                                      swizzle(temp, Y, Y, Y, Y),
-                                      swizzle(cnst, W, W, W, W),
-                                      swizzle(temp, X, X, X, X),
-                                      flags);
-                       }
+                       emit_arith(rp, PFS_OP_MAD, dest, mask,
+                                  swizzle(temp, Y, Y, Y, Y),
+                                  swizzle(rp->const_sin[0], W, W, W, W),
+                                  swizzle(temp, X, X, X, X),
+                                  flags);
+
                        free_temp(rp, temp);
                        break;
                case OPCODE_DP3:
@@ -1577,7 +1579,93 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
                                   flags);
                        break;
                case OPCODE_SCS:
-                       ERROR("SCS not implemented\n");
+                       /*
+                        * cos using a parabola (see SIN):
+                        * cos(x):
+                        *   x += PI/2
+                        *   x = (x/(2*PI))+0.5
+                        *   x = frac(x)
+                        *   x = (x*2*PI)-PI
+                        *   result = sin(x)
+                        */
+                       temp = get_temp_reg(rp);
+                       make_sin_const(rp);
+                       src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
+
+                       /* add 0.5*PI and do range reduction */
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X|WRITEMASK_Y,
+                                  swizzle(rp->const_sin[0], Z, Z, Z, Z),
+                                  rp->const_sin[1],
+                                  swizzle(keep(src[0]), X, X, X, X),
+                                  0);
+
+                       emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_W,
+                                  swizzle(rp->const_sin[0], Z, Z, Z, Z),
+                                  negate(pfs_half),
+                                  swizzle(keep(src[0]), X, X, X, X),
+                                  0);
+
+                       emit_arith(rp, PFS_OP_CMP, temp, WRITEMASK_Z,
+                                  swizzle(temp, X, X, X, X),
+                                  swizzle(temp, Y, Y, Y, Y),
+                                  swizzle(temp, W, W, W, W),
+                                  0);
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+                                  swizzle(temp, Z, Z, Z, Z),
+                                  rp->const_sin[0],
+                                  pfs_zero,
+                                  0);
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+                                  swizzle(temp, Y, Y, Y, Y),
+                                  absolute(swizzle(temp, Z, Z, Z, Z)),
+                                  swizzle(temp, X, X, X, X),
+                                  0);
+
+                       if(mask & WRITEMASK_Y)
+                       {
+                           emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+                                      swizzle(keep(src[0]), X, X, X, X),
+                                      rp->const_sin[0],
+                                      pfs_zero,
+                                      0);
+
+                           emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+                                      swizzle(temp, Y, Y, Y, Y),
+                                      absolute(swizzle(keep(src[0]), X, X, X, X)),
+                                      swizzle(temp, X, X, X, X),
+                                      0);
+                       }
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+                                  swizzle(temp, W, W, W, W),
+                                  absolute(swizzle(temp, W, W, W, W)),
+                                  negate(swizzle(temp, W, W, W, W)),
+                                  0);
+
+                       emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_X,
+                                  swizzle(temp, Z, Z, Z, Z),
+                                  swizzle(rp->const_sin[0], W, W, W, W),
+                                  swizzle(temp, W, W, W, W),
+                                  flags);
+
+                       if(mask & WRITEMASK_Y)
+                       {
+                           emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W,
+                                      swizzle(temp, X, X, X, X),
+                                      absolute(swizzle(temp, X, X, X, X)),
+                                      negate(swizzle(temp, X, X, X, X)),
+                                      0);
+
+                           emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y,
+                                      swizzle(temp, W, W, W, W),
+                                      swizzle(rp->const_sin[0], W, W, W, W),
+                                      swizzle(temp, X, X, X, X),
+                                      flags);
+                       }
+                       free_temp(rp, temp);
                        break;
                case OPCODE_SGE:
                        src[0] = t_src(rp, fpi->SrcReg[0]);
@@ -1603,48 +1691,56 @@ static GLboolean parse_program(struct r300_fragment_program *rp)
                         */
 
                        temp = get_temp_reg(rp);
-                       if(rp->const_sin == -1){
-                           cnstv[0] = 1.273239545;
-                           cnstv[1] =-0.405284735;
-                           cnstv[2] = 3.141592654;
-                           cnstv[3] = 0.225;
-                           rp->const_sin = emit_const4fv(rp, cnstv);
-                       }
-                       cnst = rp->const_sin;
+                       make_sin_const(rp);
                        src[0] = t_scalar_src(rp, fpi->SrcReg[0]);
 
-                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+                       /* do range reduction */
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
                                   swizzle(keep(src[0]), X, X, X, X),
-                                  cnst,
+                                  swizzle(rp->const_sin[1], Z, Z, Z, Z),
+                                  pfs_half,
+                                  0);
+
+                       emit_arith(rp, PFS_OP_FRC, temp, WRITEMASK_X,
+                                  swizzle(temp, X, X, X, X),
+                                  undef,
+                                  undef,
+                                  0);
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Z,
+                                  swizzle(temp, X, X, X, X),
+                                  swizzle(rp->const_sin[1], W, W, W, W), //2*PI
+                                  negate(swizzle(rp->const_sin[0], Z, Z, Z, Z)), //PI
+                                  0);
+
+                       /* SIN */
+
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X | WRITEMASK_Y,
+                                  swizzle(temp, Z, Z, Z, Z),
+                                  rp->const_sin[0],
                                   pfs_zero,
                                   0);
 
-                       if(rp->optimization == DRI_CONF_FP_OPTIMIZATION_SPEED){
-                           emit_arith(rp, PFS_OP_MAD, dest, mask,
-                                      swizzle(temp, Y, Y, Y, Y),
-                                      absolute(swizzle(src[0], X, X, X, X)),
-                                      swizzle(temp, X, X, X, X),
-                                      flags);
-                       }else{
-                           emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
-                                      swizzle(temp, Y, Y, Y, Y),
-                                      absolute(swizzle(src[0], X, X, X, X)),
-                                      swizzle(temp, X, X, X, X),
-                                      0);
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_X,
+                                  swizzle(temp, Y, Y, Y, Y),
+                                  absolute(swizzle(temp, Z, Z, Z, Z)),
+                                  swizzle(temp, X, X, X, X),
+                                  0);
                        
-                           emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
-                                      swizzle(temp, X, X, X, X),
-                                      absolute(swizzle(temp, X, X, X, X)),
-                                      negate(swizzle(temp, X, X, X, X)),
-                                      0);
+                       emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_Y,
+                                  swizzle(temp, X, X, X, X),
+                                  absolute(swizzle(temp, X, X, X, X)),
+                                  negate(swizzle(temp, X, X, X, X)),
+                                  0);
 
 
-                           emit_arith(rp, PFS_OP_MAD, dest, mask,
-                                      swizzle(temp, Y, Y, Y, Y),
-                                      swizzle(cnst, W, W, W, W),
-                                      swizzle(temp, X, X, X, X),
-                                      flags);
-                       }
+                       emit_arith(rp, PFS_OP_MAD, dest, mask,
+                                  swizzle(temp, Y, Y, Y, Y),
+                                  swizzle(rp->const_sin[0], W, W, W, W),
+                                  swizzle(temp, X, X, X, X),
+                                  flags);
+
                        free_temp(rp, temp);
                        break;
                case OPCODE_SLT:
@@ -1739,7 +1835,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *rp)
        rp->max_temp_idx = 0;
        rp->node[0].alu_end = -1;
        rp->node[0].tex_end = -1;
-       rp->const_sin = -1;
+       rp->const_sin[0] = -1;
        
        _mesa_memset(cs, 0, sizeof(*rp->cs));
        for (i=0;i<PFS_MAX_ALU_INST;i++) {