[r300] Fix KIL instruction and swizzling of texture coordinates
authorNicolai Haehnle <nhaehnle@gmail.com>
Sun, 1 Jun 2008 11:13:32 +0000 (13:13 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sun, 1 Jun 2008 11:23:53 +0000 (13:23 +0200)
The KIL instruction only works if at least one texture unit is enabled
in hardware.

Texture instructions do not support swizzles, negations etc. natively,
so we now emit an explicit swizzling etc. operation when the texture coordinate
requires it.

This fixes the Piglit fp-kil test.

src/mesa/drivers/dri/r300/r300_fragprog.c
src/mesa/drivers/dri/r300/r300_state.c

index a28841dda8f89b857e7587f94d005406ff00ede1..ac45da440494150189d18fd688c1e749e7461ae1 100644 (file)
@@ -903,49 +903,59 @@ static void emit_tex(struct r300_fragment_program *fp,
        int hwsrc, hwdest;
        GLuint tempreg = 0;
 
+       /**
+        * Hardware uses [0..1]x[0..1] range for rectangle textures
+        * instead of [0..Width]x[0..Height].
+        * Add a scaling instruction.
+        *
+        * \todo Refactor this once we have proper rewriting/optimization
+        * support for programs.
+        */
+       if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
+               gl_state_index tokens[STATE_LENGTH] = {
+                       STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
+                       0
+               };
+               int factor_index;
+               GLuint factorreg;
+
+               tokens[2] = unit;
+               factor_index =
+                       _mesa_add_state_reference(fp->mesa_program.Base.
+                                               Parameters, tokens);
+               factorreg =
+                       emit_const4fv(fp,
+                               fp->mesa_program.Base.Parameters->
+                               ParameterValues[factor_index]);
+               tempreg = keep(get_temp_reg(fp));
+
+               emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
+                       coord, factorreg, pfs_zero, 0);
+
+               coord = tempreg;
+       }
+
+       /* Texture operations do not support swizzles etc. in hardware,
+        * so emit an additional arithmetic operation if necessary.
+        */
+       if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ ||
+           REG_GET_SSWZ(coord) != SWIZZLE_W ||
+           coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) {
+               assert(tempreg == 0);
+               tempreg = keep(get_temp_reg(fp));
+               emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
+                       coord, pfs_one, pfs_zero, 0);
+               coord = tempreg;
+       }
+
+       /* Ensure correct node indirection */
        uin = cs->used_in_node;
        din = cs->dest_in_node;
 
        /* Resolve source/dest to hardware registers */
-       if (opcode != R300_TEX_OP_KIL) {
-               if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) {
-                       /**
-                        * Hardware uses [0..1]x[0..1] range for rectangle textures
-                        * instead of [0..Width]x[0..Height].
-                        * Add a scaling instruction.
-                        *
-                        * \todo Refactor this once we have proper rewriting/optimization
-                        * support for programs.
-                        */
-                       gl_state_index tokens[STATE_LENGTH] = {
-                               STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
-                               0
-                       };
-                       int factor_index;
-                       GLuint factorreg;
-
-                       tokens[2] = unit;
-                       factor_index =
-                           _mesa_add_state_reference(fp->mesa_program.Base.
-                                                     Parameters, tokens);
-                       factorreg =
-                           emit_const4fv(fp,
-                                         fp->mesa_program.Base.Parameters->
-                                         ParameterValues[factor_index]);
-                       tempreg = keep(get_temp_reg(fp));
-
-                       emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW,
-                                  coord, factorreg, pfs_zero, 0);
-
-                       /* Ensure correct node indirection */
-                       uin = cs->used_in_node;
-                       din = cs->dest_in_node;
-
-                       hwsrc = t_hw_src(fp, tempreg, GL_TRUE);
-               } else {
-                       hwsrc = t_hw_src(fp, coord, GL_TRUE);
-               }
+       hwsrc = t_hw_src(fp, coord, GL_TRUE);
 
+       if (opcode != R300_TEX_OP_KIL) {
                dest = t_dst(fp, fpi->DstReg);
 
                /* r300 doesn't seem to be able to do TEX->output reg */
@@ -972,7 +982,6 @@ static void emit_tex(struct r300_fragment_program *fp,
        } else {
                hwdest = 0;
                unit = 0;
-               hwsrc = t_hw_src(fp, coord, GL_TRUE);
        }
 
        /* Indirection if source has been written in this node, or if the
index dc14759c08160f173f0b7262b8d795a4fef4580a..df4573d6b2200928fc5f4c37a391c5076741863e 100644 (file)
@@ -1337,13 +1337,13 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
                int unit;
                int opcode;
                unsigned long val;
-                       
+
                unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT;
                unit &= 15;
-                       
+
                val = fp->tex.inst[i];
                val &= ~R300_TEX_ID_MASK;
-                       
+
                opcode =
                        (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT;
                if (opcode == R300_TEX_OP_KIL) {
@@ -1361,10 +1361,9 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
                        }
                }
        }
-       
+
        r300->hw.fpt.cmd[R300_FPT_CMD_0] =
                cmdpacket0(R300_US_TEX_INST_0, fp->tex.length);
-       
 }
 
 static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
@@ -1384,7 +1383,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
                        unit = (val >> 16) & 0xf;
 
                        val &= ~(0xf << 16);
-                       
+
                        opcode = val & (0x7 << 22);
                        if (opcode == R500_TEX_INST_TEXKILL) {
                                new_unit = 0;
@@ -1515,10 +1514,17 @@ static void r300SetupTextures(GLcontext * ctx)
        if (!fp)                /* should only happenen once, just after context is created */
                return;
 
-
-        if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515)
+       if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+               if (fp->mesa_program.UsesKill && last_hw_tmu < 0) {
+                       // The KILL operation requires the first texture unit
+                       // to be enabled.
+                       r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+                       r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+                       r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+                               cmdpacket0(R300_TX_FILTER0_0, 1);
+               }
                r300SetupFragmentShaderTextures(ctx, tmu_mappings);
-       else 
+       } else
                r500SetupFragmentShaderTextures(ctx, tmu_mappings);
 
        if (RADEON_DEBUG & DEBUG_STATE)
@@ -1623,7 +1629,7 @@ static void r300SetupRSUnit(GLcontext * ctx)
                };
 
                r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz;
+
                r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
                if (InputsRead & (FRAG_BIT_TEX0 << i)) {
 
@@ -1756,15 +1762,15 @@ static void r500SetupRSUnit(GLcontext * ctx)
 
                /* with TCL we always seem to route 4 components */
                if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-                 
+
                  if (hw_tcl_on)
                    count = 4;
                  else
                    count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
-                 
+
                  /* always have on texcoord */
                  swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT;
-                 if (count >= 2) 
+                 if (count >= 2)
                    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT;
                  else
                    swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
@@ -1774,11 +1780,11 @@ static void r500SetupRSUnit(GLcontext * ctx)
                  else
                    swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
 
-                 if (count == 4) 
+                 if (count == 4)
                    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT;
                  else
                    swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
-                 
+
                } else
                   swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
                          (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
@@ -1920,7 +1926,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
 
     R300_STATECHANGE(rmesa, vap_cntl);
     if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
-       rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = 
+       rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] =
            (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) |
            (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) |
            (12 << R300_VF_MAX_VTX_NUM_SHIFT);
@@ -2640,11 +2646,11 @@ void r300UpdateClipPlanes( GLcontext *ctx )
 {
        r300ContextPtr rmesa = R300_CONTEXT(ctx);
        GLuint p;
-       
+
        for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
                if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
                        GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p];
-                       
+
                        R300_STATECHANGE( rmesa, vpucp[p] );
                        rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
                        rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];