r300/compiler: Correctly calculate the max number of iterations for loops.
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_program_tex.c
index 655e84b4cb8dc44353038366e24b89be5d20eb06..9c4b65f4c00c12ce0f91b801371f43f4427e3c79 100644 (file)
 
 #include "radeon_program_tex.h"
 
-#include "../r300_reg.h"
-
 /* Series of transformations to be done on textures. */
 
-static struct rc_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
+static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
+                                                                                        int tmu)
 {
        struct rc_src_register reg = { 0, };
 
-       reg.File = RC_FILE_CONSTANT;
-       reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
-       reg.Swizzle = RC_SWIZZLE_WWWW;
+       if (compiler->enable_shadow_ambient) {
+               reg.File = RC_FILE_CONSTANT;
+               reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+                                                                                  RC_STATE_SHADOW_AMBIENT, tmu);
+               reg.Swizzle = RC_SWIZZLE_WWWW;
+       } else {
+               reg.File = RC_FILE_NONE;
+               reg.Swizzle = RC_SWIZZLE_0000;
+       }
        return reg;
 }
 
+static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
+                                                          struct rc_instruction *inst)
+{
+       struct rc_instruction *inst_rect;
+       unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+       if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+               compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
+               inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+               inst_rect->U.I.Opcode = RC_OPCODE_MUL;
+               inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst_rect->U.I.DstReg.Index = temp;
+               inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+               inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+               inst_rect->U.I.SrcReg[1].Index =
+                               rc_constants_add_state(&compiler->Base.Program.Constants,
+                                                                          RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
+
+               reset_srcreg(&inst->U.I.SrcReg[0]);
+               inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+               inst->U.I.SrcReg[0].Index = temp;
+
+               inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+       }
+}
+
 /**
  * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
  *  - implement texture compare (shadow extensions)
@@ -65,7 +97,8 @@ int radeonTransformTEX(
 
        /* ARB_shadow & EXT_shadow_funcs */
        if (inst->U.I.Opcode != RC_OPCODE_KIL &&
-               c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) {
+               ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+                (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
                rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
 
                if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
@@ -75,58 +108,81 @@ int radeonTransformTEX(
                                inst->U.I.SrcReg[0].File = RC_FILE_NONE;
                                inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
                        } else {
-                               inst->U.I.SrcReg[0] = shadow_ambient(c, inst->U.I.TexSrcUnit);
+                               inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
                        }
 
                        return 1;
                } else {
                        rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
-                       unsigned int depthmode = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_mode;
-                       struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
-                       struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
-                       struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
-                       int pass, fail;
-
-                       inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
-                       inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                       inst_rcp->U.I.DstReg.Index = rc_find_free_temporary(c);
-                       inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
-                       inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-                       inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
-
-                       inst_cmp->U.I.DstReg = inst->U.I.DstReg;
+                       struct rc_instruction * inst_rcp = NULL;
+                       struct rc_instruction * inst_mad;
+                       struct rc_instruction * inst_cmp;
+                       unsigned tmp_texsample = rc_find_free_temporary(c);
+                       unsigned tmp_sum = rc_find_free_temporary(c);
+                       unsigned tmp_recip_w = 0;
+                       int pass, fail, tex;
+
+                       /* Save the output register. */
+                       struct rc_dst_register output_reg = inst->U.I.DstReg;
+
+                       /* Redirect TEX to a new temp. */
                        inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                       inst->U.I.DstReg.Index = rc_find_free_temporary(c);
+                       inst->U.I.DstReg.Index = tmp_texsample;
                        inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
 
-                       inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+                       if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+                               tmp_recip_w = rc_find_free_temporary(c);
+
+                               /* Compute 1/W. */
+                               inst_rcp = rc_insert_new_instruction(c, inst);
+                               inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+                               inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst_rcp->U.I.DstReg.Index = tmp_recip_w;
+                               inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+                               inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+                               inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+                       }
+
+                       /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+                       inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
                        inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                       inst_mad->U.I.DstReg.Index = rc_find_free_temporary(c);
+                       inst_mad->U.I.DstReg.Index = tmp_sum;
                        inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
                        inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
-                       inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
-                       inst_mad->U.I.SrcReg[1].Index = inst_rcp->U.I.DstReg.Index;
-                       inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
-                       inst_mad->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
-                       inst_mad->U.I.SrcReg[2].Index = inst->U.I.DstReg.Index;
-                       if (depthmode == 0) /* GL_LUMINANCE */
-                               inst_mad->U.I.SrcReg[2].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z);
-                       else if (depthmode == 2) /* GL_ALPHA */
-                               inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_WWWW;
-
-                       /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
-                        *   r  < tex  <=>      -tex+r < 0
-                        *   r >= tex  <=> not (-tex+r < 0 */
+                       if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+                               inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+                               inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+                               inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
+                               inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+                               tex = 2;
+                       } else {
+                               inst_mad->U.I.Opcode = RC_OPCODE_ADD;
+                               tex = 1;
+                       }
+                       inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
+                       inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
+                       inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;
+
+                       /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
+                       if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
+                               comparefunc = RC_COMPARE_FUNC_GEQUAL;
+                       } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+                               comparefunc = RC_COMPARE_FUNC_LESS;
+                       }
+
+                       /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
+                        *   LESS:    r  < tex  <=>      -tex+r < 0
+                        *   GEQUAL:  r >= tex  <=> not (-tex+r < 0)
+                        *   GREATER: r  > tex  <=>       tex-r < 0
+                        *   LEQUAL:  r <= tex  <=> not ( tex-r < 0)
+                        *
+                        * This negates either r or tex: */
                        if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
-                               inst_mad->U.I.SrcReg[2].Negate = inst_mad->U.I.SrcReg[2].Negate ^ RC_MASK_XYZW;
+                               inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
                        else
                                inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
 
-                       inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
-                       /* DstReg has been filled out above */
-                       inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
-                       inst_cmp->U.I.SrcReg[0].Index = inst_mad->U.I.DstReg.Index;
-
+                       /* This negates the whole expresion: */
                        if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
                                pass = 1;
                                fail = 2;
@@ -135,9 +191,14 @@ int radeonTransformTEX(
                                fail = 1;
                        }
 
+                       inst_cmp = rc_insert_new_instruction(c, inst_mad);
+                       inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+                       inst_cmp->U.I.DstReg = output_reg;
+                       inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                       inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
                        inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
                        inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
-                       inst_cmp->U.I.SrcReg[fail] = shadow_ambient(c, inst->U.I.TexSrcUnit);
+                       inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
                }
        }
 
@@ -157,51 +218,38 @@ int radeonTransformTEX(
         *
         * Mirroring is trickier. We're going to start out like repeat:
         *
-        * MUL temp0, texcoord, <scaling factor constant> ; De-mirror across axes
-        * MUL temp0, abs(temp0), 0.5 ; Pattern repeats in [0, 2]
+        * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+        * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
         *                            ; so scale to [0, 1]
-        * FRC temp0, temp0 ; Make the pattern repeat
-        * SGE temp1, temp0, 0.5 ; Select components that need to be "reflected"
-        *                       ; across the mirror
-        * MAD temp0, neg(0.5), temp1, temp0 ; Add -0.5 to the
-        *                                   ; selected components
-        * ADD temp0, temp0, temp0 ; Poor man's 2x to undo earlier MUL
+        * FRC temp, temp ; Make the pattern repeat
+        * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+        * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+        *                              ; The pattern is backwards, so reverse it (1-x).
         *
         * This gives us coords in [0, 1].
         *
-        * ~ C.
+        * ~ C & M. ;)
         */
        if (inst->U.I.Opcode != RC_OPCODE_KIL &&
                (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
                        compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
                        compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
                rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
-               struct rc_instruction *inst_rect = NULL;
-               unsigned temp = rc_find_free_temporary(c);
-
-               if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
-                       compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
-                       inst_rect = rc_insert_new_instruction(c, inst->Prev);
-
-                       inst_rect->U.I.Opcode = RC_OPCODE_MUL;
-                       inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                       inst_rect->U.I.DstReg.Index = temp;
-                       inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-                       inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
-                       inst_rect->U.I.SrcReg[1].Index =
-                               rc_constants_add_state(&c->Program.Constants,
-                                       RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
-
-                       reset_srcreg(&inst->U.I.SrcReg[0]);
-                       inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
-                       inst->U.I.SrcReg[0].Index = temp;
 
-                       inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+               /* R300 cannot sample from rectangles. */
+               if (!c->is_r500) {
+                       lower_texture_rect(compiler, inst);
                }
 
                if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
                        wrapmode != RC_WRAP_NONE) {
                        struct rc_instruction *inst_mov;
+                       unsigned temp = rc_find_free_temporary(c);
+
+                       /* For NPOT fallback, we need normalized coordinates anyway. */
+                       if (c->is_r500) {
+                               lower_texture_rect(compiler, inst);
+                       }
 
                        if (wrapmode == RC_WRAP_REPEAT) {
                                /* Both instructions will be paired up. */
@@ -212,63 +260,84 @@ int radeonTransformTEX(
                                inst_frc->U.I.DstReg.Index = temp;
                                inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
                                inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-                       } else if (wrapmode == RC_WRAP_MIRROR) {
-                               unsigned temp1;
+                       } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
                                /*
-                                * MUL temp0, abs(temp0), 0.5
-                                * FRC temp0, temp0
-                                * SGE temp1, temp0, 0.5
-                                * MAD temp0, neg(0.5), temp1, temp0
-                                * ADD temp0, temp0, temp0
+                                * Function:
+                                *   f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+                                *
+                                * Code:
+                                *   MUL temp, src0, 0.5
+                                *   FRC temp, temp
+                                *   MAD temp, temp, 2, -1
+                                *   ADD temp, 1, -abs(temp)
                                 */
 
-                               inst_rect = rc_insert_new_instruction(c, inst->Prev);
-
-                               inst_rect->U.I.Opcode = RC_OPCODE_MUL;
-                               inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.DstReg.Index = temp;
-                               inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
-                               inst_rect->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF);
-
-                               inst_rect = rc_insert_new_instruction(c, inst->Prev);
-
-                               inst_rect->U.I.Opcode = RC_OPCODE_FRC;
-                               inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.DstReg.Index = temp;
-                               inst_rect->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.SrcReg[0].Index = temp;
-
-                               temp1 = rc_find_free_temporary(c);
-                               inst_rect = rc_insert_new_instruction(c, inst->Prev);
-
-                               inst_rect->U.I.Opcode = RC_OPCODE_SGE;
-                               inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.DstReg.Index = temp1;
-                               inst_rect->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.SrcReg[0].Index = temp;
-                               inst_rect->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF);
-
-                               inst_rect = rc_insert_new_instruction(c, inst->Prev);
-
-                               inst_rect->U.I.Opcode = RC_OPCODE_MAD;
-                               inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.DstReg.Index = temp;
-                               inst_rect->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.SrcReg[0].Index = temp1;
-                               inst_rect->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF);
-                               inst_rect->U.I.SrcReg[1].Negate = 1;
-                               inst_rect->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.SrcReg[2].Index = temp;
-
-                               inst_rect = rc_insert_new_instruction(c, inst->Prev);
-
-                               inst_rect->U.I.Opcode = RC_OPCODE_ADD;
-                               inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.DstReg.Index = temp;
-                               inst_rect->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.SrcReg[0].Index = temp;
-                               inst_rect->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
-                               inst_rect->U.I.SrcReg[1].Index = temp;
+                               struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+                               unsigned two, two_swizzle;
+
+                               inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+                               inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+                               inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst_mul->U.I.DstReg.Index = temp;
+                               inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                               inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+                               inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+                               inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+                               inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+                               inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst_frc->U.I.DstReg.Index = temp;
+                               inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                               inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                               inst_frc->U.I.SrcReg[0].Index = temp;
+                               inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+                               two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+                               inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+                               inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+                               inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst_mad->U.I.DstReg.Index = temp;
+                               inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                               inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+                               inst_mad->U.I.SrcReg[0].Index = temp;
+                               inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+                               inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+                               inst_mad->U.I.SrcReg[1].Index = two;
+                               inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+                               inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+                               inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+                               inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+                               inst_add->U.I.Opcode = RC_OPCODE_ADD;
+                               inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst_add->U.I.DstReg.Index = temp;
+                               inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                               inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+                               inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+                               inst_add->U.I.SrcReg[1].Index = temp;
+                               inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+                               inst_add->U.I.SrcReg[1].Abs = 1;
+                               inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+                       } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+                               /*
+                                * Mirrored clamp modes are bloody simple, we just use abs
+                                * to mirror [0, 1] into [-1, 0]. This works for
+                                * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+                                */
+                               struct rc_instruction *inst_mov;
+
+                               inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+                               inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+                               inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+                               inst_mov->U.I.DstReg.Index = temp;
+                               inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+                               inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+                               inst_mov->U.I.SrcReg[0].Abs = 1;
                        }
 
                        /* Preserve W for TXP/TXB. */
@@ -286,9 +355,10 @@ int radeonTransformTEX(
                }
        }
 
-       /* Cannot write texture to output registers or with masks */
+       /* Cannot write texture to output registers (all chips) or with masks (non-r500) */
        if (inst->U.I.Opcode != RC_OPCODE_KIL &&
-               (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || inst->U.I.DstReg.WriteMask != RC_MASK_XYZW)) {
+               (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+                (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
                struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
 
                inst_mov->U.I.Opcode = RC_OPCODE_MOV;