r500: Handle non-native swizzles in texture instructions
authorNicolai Haehnle <nhaehnle@gmail.com>
Sun, 27 Jul 2008 14:36:05 +0000 (16:36 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sun, 27 Jul 2008 14:48:24 +0000 (16:48 +0200)
This fixes piglit's fp-kil and fp-generic/kil-swizzle tests.

src/mesa/drivers/dri/r300/r500_fragprog.c
src/mesa/drivers/dri/r300/radeon_program_pair.c

index c78deab2ace68dfcad9dce19128afe15b9ca37bc..3fbdb30acff45be434f21c761629829dc64f2a5c 100644 (file)
@@ -269,44 +269,87 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
        GLuint relevant;
        int i;
 
-       if (reg.Abs)
+       if (opcode == OPCODE_TEX ||
+           opcode == OPCODE_TXB ||
+           opcode == OPCODE_TXP ||
+           opcode == OPCODE_KIL) {
+               if (reg.Abs)
+                       return GL_FALSE;
+
+               if (reg.NegateAbs)
+                       reg.NegateBase ^= 15;
+
+               if (opcode == OPCODE_KIL) {
+                       if (reg.Swizzle != SWIZZLE_NOOP)
+                               return GL_FALSE;
+               } else {
+                       for(i = 0; i < 4; ++i) {
+                               GLuint swz = GET_SWZ(reg.Swizzle, i);
+                               if (swz == SWIZZLE_NIL) {
+                                       reg.NegateBase &= ~(1 << i);
+                                       continue;
+                               }
+                               if (swz >= 4)
+                                       return GL_FALSE;
+                       }
+               }
+
+               if (reg.NegateBase)
+                       return GL_FALSE;
+
                return GL_TRUE;
+       } else {
+               /* ALU instructions support almost everything */
+               if (reg.Abs)
+                       return GL_TRUE;
 
-       relevant = 0;
-       for(i = 0; i < 3; ++i) {
-               GLuint swz = GET_SWZ(reg.Swizzle, i);
-               if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
-                       relevant |= 1 << i;
-       }
-       if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
-               return GL_FALSE;
+               relevant = 0;
+               for(i = 0; i < 3; ++i) {
+                       GLuint swz = GET_SWZ(reg.Swizzle, i);
+                       if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+                               relevant |= 1 << i;
+               }
+               if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
+                       return GL_FALSE;
 
-       return GL_TRUE;
+               return GL_TRUE;
+       }
 }
 
 /**
- * Implement a non-native swizzle. This function assumes that
- * is_native_swizzle returned true.
+ * Implement a MOV with a potentially non-native swizzle.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation. Therefore, we split the MOV into two instructions when necessary.
  */
 static void nqssadce_build_swizzle(struct nqssadce_state *s,
        struct prog_dst_register dst, struct prog_src_register src)
 {
        struct prog_instruction *inst;
+       GLuint negatebase[2] = { 0, 0 };
+       int i;
 
-       _mesa_insert_instructions(s->Program, s->IP, 2);
-       inst = s->Program->Instructions + s->IP;
+       for(i = 0; i < 4; ++i) {
+               GLuint swz = GET_SWZ(src.Swizzle, i);
+               if (swz == SWIZZLE_NIL)
+                       continue;
+               negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i;
+       }
 
-       inst[0].Opcode = OPCODE_MOV;
-       inst[0].DstReg = dst;
-       inst[0].DstReg.WriteMask &= src.NegateBase;
-       inst[0].SrcReg[0] = src;
+       _mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
+       inst = s->Program->Instructions + s->IP;
 
-       inst[1].Opcode = OPCODE_MOV;
-       inst[1].DstReg = dst;
-       inst[1].DstReg.WriteMask &= ~src.NegateBase;
-       inst[1].SrcReg[0] = src;
+       for(i = 0; i <= 1; ++i) {
+               if (!negatebase[i])
+                       continue;
 
-       s->IP += 2;
+               inst->Opcode = OPCODE_MOV;
+               inst->DstReg = dst;
+               inst->DstReg.WriteMask = negatebase[i];
+               inst->SrcReg[0] = src;
+               inst++;
+               s->IP++;
+       }
 }
 
 static GLuint build_dtm(GLuint depthmode)
index 8762422801eb03701039a3b56dc4c2158232a80b..4307994d74c0b13ee54690803e0d335d5374d92c 100644 (file)
@@ -265,11 +265,21 @@ static void final_rewrite(struct pair_state *s, struct prog_instruction *inst)
                inst->SrcReg[0] = tmp;
                break;
        case OPCODE_MOV:
-               inst->SrcReg[1] = inst->SrcReg[0];
+               /* AMD say we should use CMP.
+                * However, when we transform
+                *  KIL -r0;
+                * into
+                *  CMP tmp, -r0, -r0, 0;
+                *  KIL tmp;
+                * we get incorrect behaviour on R500 when r0 == 0.0.
+                * It appears that the R500 KIL hardware treats -0.0 as less
+                * than zero.
+                */
+               inst->SrcReg[1].File = PROGRAM_BUILTIN;
+               inst->SrcReg[1].Swizzle = SWIZZLE_1111;
                inst->SrcReg[2].File = PROGRAM_BUILTIN;
                inst->SrcReg[2].Swizzle = SWIZZLE_0000;
-               inst->Opcode = OPCODE_CMP;
-               // TODO: disable output modifiers on R500
+               inst->Opcode = OPCODE_MAD;
                break;
        case OPCODE_MUL:
                inst->SrcReg[2].File = PROGRAM_BUILTIN;