r300/compiler: Fix vertex program MAD emit
authorNicolai Hähnle <nhaehnle@gmail.com>
Wed, 26 Aug 2009 21:31:37 +0000 (23:31 +0200)
committerNicolai Hähnle <nhaehnle@gmail.com>
Wed, 26 Aug 2009 23:46:50 +0000 (01:46 +0200)
Only use the macro variant of MAD when absolutely necessary.
Apparently it cannot deal with relative addressing.

Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c

index fc9c8f805ae0091751065bd4cc52d2356e385d10..6c1a03868911d80e5dfc915ca860e3b09c49db3e 100644 (file)
@@ -284,12 +284,52 @@ static void ei_mad(struct r300_vertex_program_code *vp,
                                      struct prog_instruction *vpi,
                                      GLuint * inst)
 {
-       inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
-                                    GL_FALSE,
-                                    GL_TRUE,
-                                    t_dst_index(vp, &vpi->DstReg),
-                                    t_dst_mask(vpi->DstReg.WriteMask),
-                                    t_dst_class(vpi->DstReg.File));
+       /* Remarks about hardware limitations of MAD
+        * (please preserve this comment, as this information is _NOT_
+        * in the documentation provided by AMD).
+        *
+        * As described in the documentation, MAD with three unique temporary
+        * source registers requires the use of the macro version.
+        *
+        * However (and this is not mentioned in the documentation), apparently
+        * the macro version is _NOT_ a full superset of the normal version.
+        * In particular, the macro version does not always work when relative
+        * addressing is used in the source operands.
+        *
+        * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+        * assembly shader path when using medium quality animations
+        * (i.e. animations with matrix blending instead of quaternion blending).
+        *
+        * Unfortunately, I (nha) have been unable to extract a Piglit regression
+        * test for this issue - for some reason, it is possible to have vertex
+        * programs whose prefix is *exactly* the same as the prefix of the
+        * offending program in Sauerbraten up to the offending instruction
+        * without causing any trouble.
+        *
+        * Bottom line: Only use the macro version only when really necessary;
+        * according to AMD docs, this should improve performance by one clock
+        * as a nice side bonus.
+        */
+       if (vpi->SrcReg[0].File == PROGRAM_TEMPORARY &&
+           vpi->SrcReg[1].File == PROGRAM_TEMPORARY &&
+           vpi->SrcReg[2].File == PROGRAM_TEMPORARY &&
+           vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+           vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+           vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+               inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+                               GL_FALSE,
+                               GL_TRUE,
+                               t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask),
+                               t_dst_class(vpi->DstReg.File));
+       } else {
+               inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                               GL_FALSE,
+                               GL_FALSE,
+                               t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask),
+                               t_dst_class(vpi->DstReg.File));
+       }
        inst[1] = t_src(vp, &vpi->SrcReg[0]);
        inst[2] = t_src(vp, &vpi->SrcReg[1]);
        inst[3] = t_src(vp, &vpi->SrcReg[2]);