r300/compiler: Enable presubtract sources
authorTom Stellard <tstellar@gmail.com>
Wed, 14 Jul 2010 04:25:27 +0000 (21:25 -0700)
committerTom Stellard <tstellar@gmail.com>
Sat, 11 Sep 2010 01:18:08 +0000 (18:18 -0700)
The r300 compiler can now emit instructions that select from the presubtract
source.  A peephole optimization has been added to convert instructions like:
ADD Temp[0].x, none.1, -Temp[1].x into the INV (1 - src0) presubtract
operation.

17 files changed:
src/gallium/drivers/r300/r300_fs.c
src/gallium/drivers/r300/r300_vs.c
src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
src/mesa/drivers/dri/r300/compiler/radeon_program.h
src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c

index b9f4d77dea7e5207244390f90a4227660ce8d81e..b8dab88ef09f0f5b50a0e75a4c7f076eaa81bc15 100644 (file)
@@ -387,6 +387,7 @@ static void r300_translate_fragment_shader(
     compiler.Base.is_r500 = r300->screen->caps.is_r500;
     compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
     compiler.Base.has_half_swizzles = TRUE;
+    compiler.Base.has_presub = TRUE;
     compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
     compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
     compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
index 22c94adbda0c8e6294451bd66475a950661c90f5..e2b9af9d018ce1a877b3687f06562288e0f91b74 100644 (file)
@@ -208,6 +208,7 @@ void r300_translate_vertex_shader(struct r300_context *r300,
     compiler.Base.is_r500 = r300->screen->caps.is_r500;
     compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
     compiler.Base.has_half_swizzles = FALSE;
+    compiler.Base.has_presub = FALSE;
     compiler.Base.max_temp_regs = 32;
     compiler.Base.max_constants = 256;
     compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256;
index 3c83deffcb56c6866d689334cb77a44b74e62592..782671bac0156810f89448c73eeb997ec8beb824 100644 (file)
 
 #include "../r300_reg.h"
 
+static void presub_string(char out[10], unsigned int inst)
+{
+       switch(inst & 0x600000){
+       case R300_ALU_SRCP_1_MINUS_2_SRC0:
+               sprintf(out, "bias");
+               break;
+       case R300_ALU_SRCP_SRC1_MINUS_SRC0:
+               sprintf(out, "sub");
+               break;
+       case R300_ALU_SRCP_SRC1_PLUS_SRC0:
+               sprintf(out, "add");
+               break;
+       case R300_ALU_SRCP_1_MINUS_SRC0:
+               sprintf(out, "inv ");
+               break;
+       }
+}
+
 /* just some random things... */
 void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 {
@@ -98,8 +116,8 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 
                for (i = alu_offset;
                     i <= alu_offset + alu_end; ++i) {
-                       char srcc[3][10], dstc[20];
-                       char srca[3][10], dsta[20];
+                       char srcc[4][10], dstc[20];
+                       char srca[4][10], dsta[20];
                        char argc[3][20];
                        char arga[3][20];
                        char flags[5], tmp[10];
@@ -142,6 +160,9 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
                                        flags);
                                strcat(dstc, tmp);
                        }
+                       /* Presub */
+                       presub_string(srcc[3], code->alu.inst[i].rgb_inst);
+                       presub_string(srca[3], code->alu.inst[i].alpha_inst);
 
                        dsta[0] = 0;
                        if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
@@ -160,11 +181,12 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
                        }
 
                        fprintf(stderr,
-                               "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
-                               "       w: %3s %3s %3s -> %-20s (%08x)\n", i,
-                               srcc[0], srcc[1], srcc[2], dstc,
+                               "%3i: xyz: %3s %3s %3s %5s-> %-20s (%08x)\n"
+                               "       w: %3s %3s %3s %5s-> %-20s (%08x)\n", i,
+                               srcc[0], srcc[1], srcc[2], srcc[3], dstc,
                                code->alu.inst[i].rgb_addr, srca[0], srca[1],
-                               srca[2], dsta, code->alu.inst[i].alpha_addr);
+                               srca[2], srca[3], dsta,
+                               code->alu.inst[i].alpha_addr);
 
                        for (j = 0; j < 3; ++j) {
                                int regc = code->alu.inst[i].rgb_inst >> (j * 7);
@@ -194,6 +216,24 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
                                        }
                                } else if (d < 15) {
                                        sprintf(buf, "%s.www", srca[d - 12]);
+                               } else if (d < 20 ) {
+                                       switch(d) {
+                                       case R300_ALU_ARGC_SRCP_XYZ:
+                                               sprintf(buf, "srcp.xyz");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_XXX:
+                                               sprintf(buf, "srcp.xxx");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_YYY:
+                                               sprintf(buf, "srcp.yyy");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_ZZZ:
+                                               sprintf(buf, "srcp.zzz");
+                                               break;
+                                       case R300_ALU_ARGC_SRCP_WWW:
+                                               sprintf(buf, "srcp.www");
+                                               break;
+                                       }
                                } else if (d == 20) {
                                        sprintf(buf, "0.0");
                                } else if (d == 21) {
@@ -231,6 +271,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
                                                'x' + (char)(d % 3));
                                } else if (d < 12) {
                                        sprintf(buf, "%s.w", srca[d - 9]);
+                               } else if (d < 16) {
+                                       switch(d) {
+                                       case R300_ALU_ARGA_SRCP_X:
+                                               sprintf(buf, "srcp.x");
+                                               break;
+                                       case R300_ALU_ARGA_SRCP_Y:
+                                               sprintf(buf, "srcp.y");
+                                               break;
+                                       case R300_ALU_ARGA_SRCP_Z:
+                                               sprintf(buf, "srcp.z");
+                                               break;
+                                       case R300_ALU_ARGA_SRCP_W:
+                                               sprintf(buf, "srcp.w");
+                                               break;
+                                       }
                                } else if (d == 16) {
                                        sprintf(buf, "0.0");
                                } else if (d == 17) {
@@ -247,11 +302,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
                                        buf, (rega & 64) ? "|" : "");
                        }
 
-                       fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x\n"
+                       fprintf(stderr, "     xyz: %8s %8s %8s    op: %08x %s\n"
                                "       w: %8s %8s %8s    op: %08x\n",
                                argc[0], argc[1], argc[2],
-                               code->alu.inst[i].rgb_inst, arga[0], arga[1],
-                               arga[2], code->alu.inst[i].alpha_inst);
+                               code->alu.inst[i].rgb_inst,
+                               code->alu.inst[i].rgb_inst & R300_ALU_INSERT_NOP ?
+                               "NOP" : "",
+                               arga[0], arga[1],arga[2],
+                               code->alu.inst[i].alpha_inst);
                }
        }
 }
index eead2ea4260cc2a53fa71e74c69683d205439dcd..3b2b06fc2bd0e1ce2922a78744d4265140c15132 100644 (file)
@@ -164,6 +164,53 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
                code->alu.inst[ip].alpha_inst |= arg << (7*j);
        }
 
+       /* Presubtract */
+       if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_1_MINUS_2_SRC0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_SRC1_PLUS_SRC0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_SRC1_MINUS_SRC0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->alu.inst[ip].rgb_inst |=
+                                               R300_ALU_SRCP_1_MINUS_SRC0;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+               switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_1_MINUS_2_SRC0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_SRC1_PLUS_SRC0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_SRC1_MINUS_SRC0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->alu.inst[ip].alpha_inst |=
+                                               R300_ALU_SRCP_1_MINUS_SRC0;
+                       break;
+               default:
+                       break;
+               }
+       }
+
        if (inst->RGB.Saturate)
                code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
        if (inst->Alpha.Saturate)
@@ -198,6 +245,8 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
                emit->node_flags |= R300_W_OUT;
                c->code->writes_depth = 1;
        }
+       if (inst->Nop)
+               code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
 
        return 1;
 }
index 5d5de2f1b2ab03186453fa346d3189b126574846..caa48fe478c47c143ff39a6a31991d8820bfe9d2 100644 (file)
@@ -44,25 +44,25 @@ struct swizzle_data {
        unsigned int hash; /**< swizzle value this matches */
        unsigned int base; /**< base value for hw swizzle */
        unsigned int stride; /**< difference in base between arg0/1/2 */
+       unsigned int srcp_stride; /**< difference in base between arg0/scrp */
 };
 
 static const struct swizzle_data native_swizzles[] = {
-       {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
-       {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
-       {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
-       {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
-       {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
-       {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
-       {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
-       {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
-       {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
-       {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
-       {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
+       {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, 15},
+       {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, 15},
+       {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, 15},
+       {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, 15},
+       {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, 7},
+       {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, 0},
+       {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, 0},
+       {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, 0},
+       {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0},
+       {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0},
+       {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}
 };
 
 static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
 
-
 /**
  * Find a native RGB swizzle that matches the given swizzle.
  * Returns 0 if none found.
@@ -205,7 +205,11 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
                return 0;
        }
 
-       return sd->base + src*sd->stride;
+       if (src == RC_PAIR_PRESUB_SRC) {
+               return sd->base + sd->srcp_stride;
+       } else {
+               return sd->base + src*sd->stride;
+       }
 }
 
 
@@ -215,6 +219,9 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
  */
 unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
 {
+       if (src == RC_PAIR_PRESUB_SRC) {
+               return R300_ALU_ARGA_SRCP_X + swizzle;
+       }
        if (swizzle < 3)
                return swizzle + 3*src;
 
index 3220349f26cd96c66804dc42b7d2b512ceee0862..54cff9169a4d87de732056d46cb71ec36bd102c2 100644 (file)
@@ -260,6 +260,9 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
 
        code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
        code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+       if (inst->Nop) {
+               code->inst[ip].inst0 |= R500_INST_NOP;
+       }
        if (inst->Alpha.DepthWriteMask) {
                code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
                c->code->writes_depth = 1;
@@ -275,6 +278,40 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair
        if (inst->Alpha.Saturate)
                code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
 
+       /* Set the presubtract operation. */
+       switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
+                       break;
+               default:
+                       break;
+       }
+       switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+               case RC_PRESUB_BIAS:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
+                       break;
+               case RC_PRESUB_SUB:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
+                       break;
+               case RC_PRESUB_ADD:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
+                       break;
+               case RC_PRESUB_INV:
+                       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
+                       break;
+               default:
+                       break;
+       }
+
        code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
        code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
        code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
index 85c2e43ad672189ac19bee6d79bfb4ad9bb90170..6d96ac9fdd94cdd715d4dfced2be1225832c7703 100644 (file)
@@ -42,6 +42,7 @@ struct radeon_compiler {
        /* Hardware specification. */
        unsigned is_r500:1;
        unsigned has_half_swizzles:1;
+       unsigned has_presub:1;
        unsigned disable_optimizations:1;
        unsigned max_temp_regs;
        unsigned max_constants;
index 0e6c62541fa46f39bee641fef2d965f745fbe290..e73700f84a2aeef143d519c8a2d15a1cae4dda4c 100644 (file)
 
 #include "radeon_program.h"
 
+static void reads_normal_callback(
+       rc_read_write_chan_fn cb,
+       struct rc_instruction * fullinst,
+       struct rc_src_register src,
+       void * userdata)
+{
+       unsigned int refmask = 0;
+       unsigned int chan;
+       for(chan = 0; chan < 4; chan++) {
+               refmask |= 1 << GET_SWZ(src.Swizzle, chan);
+       }
+       refmask &= RC_MASK_XYZW;
+
+       if (refmask)
+               cb(userdata, fullinst, src.File, src.Index, refmask);
+
+       if (refmask && src.RelAddr)
+               cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+}
 
 static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
 {
@@ -36,47 +55,81 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
 
        for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
-               unsigned int refmask = 0;
 
                if (inst->SrcReg[src].File == RC_FILE_NONE)
                        return;
 
-               for(unsigned int chan = 0; chan < 4; ++chan)
-                       refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
-
-               refmask &= RC_MASK_XYZW;
-
-               if (refmask)
-                       cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
-
-               if (refmask && inst->SrcReg[src].RelAddr)
-                       cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+               if (inst->SrcReg[src].File == RC_FILE_PRESUB) {
+                       unsigned int i;
+                       unsigned int srcp_regs = rc_presubtract_src_reg_count(
+                                                       inst->PreSub.Opcode);
+                       for( i = 0; i < srcp_regs; i++) {
+                               reads_normal_callback(cb, fullinst,
+                                               inst->PreSub.SrcReg[i],
+                                               userdata);
+                       }
+               } else {
+                       reads_normal_callback(cb, fullinst,
+                                               inst->SrcReg[src], userdata);
+               }
        }
 }
 
-static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_mask_fn cb, void * userdata)
+static void pair_get_src_refmasks(unsigned int * refmasks,
+                                       struct rc_pair_instruction * inst,
+                                       unsigned int swz, unsigned int src)
 {
-       struct rc_pair_instruction * inst = &fullinst->U.P;
-       unsigned int refmasks[3] = { 0, 0, 0 };
-
-       if (inst->RGB.Opcode != RC_OPCODE_NOP) {
-               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+       if (swz >= 4)
+               return;
+
+       if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y || swz == RC_SWIZZLE_Z) {
+               if(src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       int srcp_regs =
+                               rc_presubtract_src_reg_count(
+                               inst->RGB.Src[src].Index);
+                       for(i = 0; i < srcp_regs; i++) {
+                               refmasks[i] |= 1 << swz;
+                       }
+               }
+               else {
+                       refmasks[src] |= 1 << swz;
+               }
+       }
 
-               for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
-                       for(unsigned int chan = 0; chan < 3; ++chan) {
-                               unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
-                               if (swz < 4)
-                                       refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
+       if (swz == RC_SWIZZLE_W) {
+               if (src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       int srcp_regs = rc_presubtract_src_reg_count(
+                                       inst->Alpha.Src[src].Index);
+                       for(i = 0; i < srcp_regs; i++) {
+                               refmasks[i] |= 1 << swz;
                        }
                }
+               else {
+                       refmasks[src] |= 1 << swz;
+               }
        }
+}
 
-       if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
-               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_mask_fn cb, void * userdata)
+{
+       struct rc_pair_instruction * inst = &fullinst->U.P;
+       unsigned int refmasks[3] = { 0, 0, 0 };
 
-               for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
-                       if (inst->Alpha.Arg[arg].Swizzle < 4)
-                               refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
+       unsigned int arg;
+
+       for(arg = 0; arg < 3; ++arg) {
+               unsigned int chan;
+               for(chan = 0; chan < 3; ++chan) {
+                       unsigned int swz_rgb =
+                               GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+                       unsigned int swz_alpha =
+                               GET_SWZ(inst->Alpha.Arg[arg].Swizzle, chan);
+                       pair_get_src_refmasks(refmasks, inst, swz_rgb,
+                                               inst->RGB.Arg[arg].Source);
+                       pair_get_src_refmasks(refmasks, inst, swz_alpha,
+                                               inst->Alpha.Arg[arg].Source);
                }
        }
 
@@ -212,10 +265,25 @@ static void remap_normal_instruction(struct rc_instruction * fullinst,
                rc_register_file file = inst->SrcReg[src].File;
                unsigned int index = inst->SrcReg[src].Index;
 
-               cb(userdata, fullinst, &file, &index);
+               if (file == RC_FILE_PRESUB) {
+                       unsigned int i;
+                       unsigned int srcp_srcs = rc_presubtract_src_reg_count(
+                                               inst->PreSub.Opcode);
+                       for(i = 0; i < srcp_srcs; i++) {
+                               file = inst->PreSub.SrcReg[i].File;
+                               index = inst->PreSub.SrcReg[i].Index;
+                               cb(userdata, fullinst, &file, &index);
+                               inst->PreSub.SrcReg[i].File = file;
+                               inst->PreSub.SrcReg[i].Index = index;
+                       }
 
-               inst->SrcReg[src].File = file;
-               inst->SrcReg[src].Index = index;
+               }
+               else {
+                       cb(userdata, fullinst, &file, &index);
+
+                       inst->SrcReg[src].File = file;
+                       inst->SrcReg[src].Index = index;
+               }
        }
 }
 
index 8e994671064155b899d62408ae5fec3dd50e2384..e01ba85aa56503f8cade8a6a5cf905e984033800 100644 (file)
 #include "radeon_compiler.h"
 #include "radeon_swizzle.h"
 
+struct peephole_state {
+       struct rc_instruction * Inst;
+       /** Stores a bitmask of the components that are still "alive" (i.e.
+        * they have not been written to since Inst was executed.)
+        */
+       unsigned int WriteMask;
+};
 
 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
 {
@@ -54,7 +61,7 @@ static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct
        return combine;
 }
 
-struct peephole_state {
+struct copy_propagate_state {
        struct radeon_compiler * C;
        struct rc_instruction * Mov;
        unsigned int Conflict:1;
@@ -84,10 +91,10 @@ struct peephole_state {
  * @param index The index of the source register.
  * @param mask The components of the source register that are being read from.
  */
-static void peephole_scan_read(void * data, struct rc_instruction * inst,
+static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
                rc_register_file file, unsigned int index, unsigned int mask)
 {
-       struct peephole_state * s = data;
+       struct copy_propagate_state * s = data;
 
        /* XXX This could probably be handled better. */
        if (file == RC_FILE_ADDRESS) {
@@ -123,10 +130,10 @@ static void peephole_scan_read(void * data, struct rc_instruction * inst,
        }
 }
 
-static void peephole_scan_write(void * data, struct rc_instruction * inst,
+static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
                rc_register_file file, unsigned int index, unsigned int mask)
 {
-       struct peephole_state * s = data;
+       struct copy_propagate_state * s = data;
 
        if (s->BranchDepth < 0)
                return;
@@ -146,9 +153,9 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
        }
 }
 
-static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
 {
-       struct peephole_state s;
+       struct copy_propagate_state s;
 
        if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
            inst_mov->U.I.DstReg.RelAddr ||
@@ -170,14 +177,23 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
        for(struct rc_instruction * inst = inst_mov->Next;
            inst != &c->Program.Instructions;
            inst = inst->Next) {
+               const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
                /* XXX In the future we might be able to make the optimizer
                 * smart enough to handle loops. */
                if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
                                || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
                        return;
                }
-               rc_for_all_reads_mask(inst, peephole_scan_read, &s);
-               rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+
+               /* It is possible to do copy propigation in this situation,
+                * just not right now, see peephole_add_presub_inv() */
+               if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
+                                               info->NumSrcRegs > 2) {
+                       return;
+               }
+
+               rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
+               rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
                if (s.Conflict)
                        return;
 
@@ -206,7 +222,6 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
            inst != &c->Program.Instructions;
            inst = inst->Next) {
                const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
-
                for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
                        if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
                            inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
@@ -217,8 +232,11 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
                                        refmask |= (1 << swz) & RC_MASK_XYZW;
                                }
 
-                               if ((refmask & s.MovMask) == refmask)
+                               if ((refmask & s.MovMask) == refmask) {
                                        inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+                                       if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+                                               inst->U.I.PreSub = s.Mov->U.I.PreSub;
+                               }
                        }
                }
 
@@ -283,7 +301,6 @@ static int is_src_uniform_constant(struct rc_src_register src,
        return 1;
 }
 
-
 static void constant_folding_mad(struct rc_instruction * inst)
 {
        rc_swizzle swz;
@@ -379,7 +396,6 @@ static void constant_folding_add(struct rc_instruction * inst)
        }
 }
 
-
 /**
  * Replace 0.0, 1.0 and 0.5 immediate constants by their
  * respective swizzles. Simplify instructions like ADD dst, src, 0;
@@ -454,6 +470,204 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
                constant_folding_add(inst);
 }
 
+/**
+ * This function returns a writemask that indicates wich components are
+ * read by src and also written by dst.
+ */
+static unsigned int src_reads_dst_mask(struct rc_src_register src,
+                                               struct rc_dst_register dst)
+{
+       unsigned int mask = 0;
+       unsigned int i;
+       if (dst.File != src.File || dst.Index != src.Index) {
+               return 0;
+       }
+
+       for(i = 0; i < 4; i++) {
+               mask |= 1 << GET_SWZ(src.Swizzle, i);
+       }
+       mask &= RC_MASK_XYZW;
+
+       return mask;
+}
+
+/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
+ * in any of its channels.  Return 0 otherwise. */
+static int src_has_const_swz(struct rc_src_register src) {
+       int chan;
+       for(chan = 0; chan < 4; chan++) {
+               unsigned int swz = GET_SWZ(src.Swizzle, chan);
+               if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
+                                               || swz == RC_SWIZZLE_ONE) {
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct peephole_state * s = data;
+       if(s->Inst->U.I.DstReg.File == file
+          && s->Inst->U.I.DstReg.Index == index) {
+               unsigned int common_mask = s->WriteMask & mask;
+               s->WriteMask &= ~common_mask;
+       }
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
+ * of the add instruction must have the constatnt 1 swizzle.  This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return 
+ *     0 if the ADD instruction is still part of the program.
+ *     1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_add)
+{
+       unsigned int i, swz, mask;
+       unsigned int can_remove = 0;
+       unsigned int cant_sub = 0;
+       struct rc_instruction * inst;
+       struct peephole_state s;
+
+       if (inst_add->U.I.SaturateMode)
+               return 0;
+
+       mask = inst_add->U.I.DstReg.WriteMask;
+
+       /* Check if src0 is 1. */
+       /* XXX It would be nice to use is_src_uniform_constant here, but that
+        * function only works if the register's file is RC_FILE_NONE */
+       for(i = 0; i < 4; i++ ) {
+               swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+               if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+                                               && swz != RC_SWIZZLE_ONE) {
+                       return 0;
+               }
+       }
+
+       /* Check src1. */
+       if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+                                               inst_add->U.I.DstReg.WriteMask
+               || inst_add->U.I.SrcReg[1].Abs
+               || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+                       && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+               || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+               return 0;
+       }
+
+       /* Setup the peephole_state information. */
+       s.Inst = inst_add;
+       s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+
+       /* For all instructions that read inst_add->U.I.DstReg before it is
+        * written again, use the 1 - src0 presubtact instead. */
+       for(inst = inst_add->Next; inst != &c->Program.Instructions;
+                                                       inst = inst->Next) {
+               const struct rc_opcode_info * info =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+
+               for(i = 0; i < info->NumSrcRegs; i++) {
+                       if(inst_add->U.I.DstReg.WriteMask !=
+                                       src_reads_dst_mask(inst->U.I.SrcReg[i],
+                                               inst_add->U.I.DstReg)) {
+                               continue;
+                       }
+                       if (cant_sub) {
+                               can_remove = 0;
+                               break;
+                       }
+                       /* XXX: There are some situations where instructions
+                        * with more than 2 src registers can use the
+                        * presubtract select, but to keep things simple we
+                        * will disable presubtract on these instructions for
+                        * now. Note: This if statement should not be pulled
+                        * outside of the loop, because it only applies to
+                        * instructions that could potentially use the
+                        * presubtract source. */
+                       if (info->NumSrcRegs > 2) {
+                               can_remove = 0;
+                               break;
+                       }
+
+                       /* We can't use more than one presubtract value in an
+                        * instruction, unless the two prsubtract operations
+                        * are the same and read from the same registers. */
+                       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+                               if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
+                                       || inst->U.I.PreSub.SrcReg[0].File !=
+                                               inst_add->U.I.SrcReg[1].File
+                                       || inst->U.I.PreSub.SrcReg[0].Index !=
+                                               inst_add->U.I.SrcReg[1].Index) {
+
+                                       can_remove = 0;
+                                       break;
+                               }
+                       }
+                       /* We must be careful not to modify inst_add, since it
+                        * is possible it will remain part of the program. */
+                       inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
+                       inst->U.I.PreSub.SrcReg[0].Negate = 0;
+                       inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
+                       inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
+                                               inst->U.I.PreSub.SrcReg[0]);
+
+                       inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
+                       inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
+                       can_remove = 1;
+               }
+               if(!can_remove)
+                       break;
+               rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+               /* If all components of inst_add's destination register have
+                * been written to by subsequent instructions, the original
+                * value of the destination register is no longer valid and
+                * we can't keep doing substitutions. */
+               if (!s.WriteMask){
+                       break;
+               }
+               /* Make this instruction doesn't write to the presubtract source. */
+               if (inst->U.I.DstReg.WriteMask &
+                               src_reads_dst_mask(inst_add->U.I.SrcReg[1],
+                                                       inst->U.I.DstReg)
+                               || info->IsFlowControl) {
+                       cant_sub = 1;
+               }
+       }
+       if(can_remove) {
+               rc_remove_instruction(inst_add);
+               return 1;
+       }
+       return 0;
+}
+
+/**
+ * @return
+ *     0 if inst is still part of the program.
+ *     1 if inst is no longer part of the program.
+ */
+static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+       switch(inst->U.I.Opcode){
+       case RC_OPCODE_ADD:
+               if (c->has_presub) {
+                       if(peephole_add_presub_inv(c, inst))
+                               return 1;
+               }
+               break;
+       default:
+               break;
+       }
+       return 0;
+}
+
 void rc_optimize(struct radeon_compiler * c, void *user)
 {
        struct rc_instruction * inst = c->Program.Instructions.Next;
@@ -463,8 +677,11 @@ void rc_optimize(struct radeon_compiler * c, void *user)
 
                constant_folding(c, cur);
 
+               if(peephole(c, cur))
+                       continue;
+
                if (cur->U.I.Opcode == RC_OPCODE_MOV) {
-                       peephole(c, cur);
+                       copy_propagate(c, cur);
                        /* cur may no longer be part of the program */
                }
        }
index 8e232bb2436e976528d393769d8d92740b07d419..32c54fd74bc469021756787527b3bb95a57b41f2 100644 (file)
@@ -279,11 +279,118 @@ static int destructive_merge_instructions(
                struct rc_pair_instruction * rgb,
                struct rc_pair_instruction * alpha)
 {
+       const struct rc_opcode_info * opcode;
        assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
        assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
 
+       /* Presubtract registers need to be merged first so that registers
+        * needed by the presubtract operation can be placed in src0 and/or
+        * src1. */
+
+       /* Merge the rgb presubtract registers. */
+       const struct rc_opcode_info * rgb_info =
+                                       rc_get_opcode_info(rgb->RGB.Opcode);
+       if (alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               unsigned int srcp_src;
+               unsigned int srcp_regs;
+               if (rgb->RGB.Src[RC_PAIR_PRESUB_SRC].Used)
+                       return 0;
+               srcp_regs = rc_presubtract_src_reg_count(
+                               alpha->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+               for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+                       unsigned int arg;
+                       int free_source;
+                       struct radeon_pair_instruction_source srcp =
+                                               alpha->RGB.Src[srcp_src];
+                       struct radeon_pair_instruction_source temp;
+                       /* 2nd arg of 1 means this is an rgb source.
+                        * 3rd arg of 0 means this is not an alpha source. */
+                       free_source = rc_pair_alloc_source(rgb, 1, 0,
+                                                       srcp.File, srcp.Index);
+                       /* If free_source == srcp_src, then either the
+                        * presubtract source is already in the correct place. */
+                       if (free_source == srcp_src)
+                               continue;
+                       /* If free_source < 0 then there are no free source
+                        * slots. */
+                       if (free_source < 0)
+                               return 0;
+                       /* Shuffle the sources, so we can put the
+                        * presubtract source in the correct place. */
+                       for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
+                               /*If this arg does not read from an rgb source,
+                                * do nothing. */
+                               if (rc_source_type_that_arg_reads(
+                                       rgb->RGB.Arg[arg].Source,
+                                       rgb->RGB.Arg[arg].Swizzle, 3)
+                                                       != RC_PAIR_SOURCE_RGB) {
+                                       continue;
+                               }
+                               if (rgb->RGB.Arg[arg].Source == srcp_src)
+                                       rgb->RGB.Arg[arg].Source = free_source;
+                               /* We need to do this just in case register
+                                * is one of the sources already, but in the
+                                * wrong spot. */
+                               else if(rgb->RGB.Arg[arg].Source == free_source)
+                                       rgb->RGB.Arg[arg].Source = srcp_src;
+                       }
+                       temp = rgb->RGB.Src[srcp_src];
+                       rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
+                       rgb->RGB.Src[free_source] = temp;
+               }
+       }
+
+       /* Merge the alpha presubtract registers */
+       if (alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+               unsigned int srcp_src;
+               unsigned int srcp_regs;
+               if(rgb->Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+                       return 0;
+
+               srcp_regs = rc_presubtract_src_reg_count(
+                       alpha->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+               for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
+                       unsigned int arg;
+                       int free_source;
+                       struct radeon_pair_instruction_source srcp =
+                                               alpha->Alpha.Src[srcp_src];
+                       struct radeon_pair_instruction_source temp;
+                       /* 2nd arg of 0 means this is not an rgb source.
+                        * 3rd arg of 1 means this is an alpha source. */
+                       free_source = rc_pair_alloc_source(rgb, 0, 1,
+                                                       srcp.File, srcp.Index);
+                       /* If free_source == srcp_src, then either the
+                        * presubtract source is already in the correct place. */
+                       if (free_source == srcp_src)
+                               continue;
+                       /* If free_source < 0 then there are no free source
+                        * slots. */
+                       if (free_source < 0)
+                               return 0;
+                       /* Shuffle the sources, so we can put the
+                        * presubtract source in the correct place. */
+                       for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
+                               /*If this arg does not read from an alpha
+                                * source, do nothing. */
+                               if (rc_source_type_that_arg_reads(
+                                       rgb->RGB.Arg[arg].Source,
+                                       rgb->RGB.Arg[arg].Swizzle, 3)
+                                               != RC_PAIR_SOURCE_ALPHA) {
+                                       continue;
+                               }
+                               if (rgb->RGB.Arg[arg].Source == srcp_src)
+                                       rgb->RGB.Arg[arg].Source = free_source;
+                               else if (rgb->RGB.Arg[arg].Source == free_source)
+                                       rgb->RGB.Arg[arg].Source = srcp_src;
+                       }
+                       temp = rgb->Alpha.Src[srcp_src];
+                       rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
+                       rgb->Alpha.Src[free_source] = temp;
+               }
+       }
+
        /* Copy alpha args into rgb */
-       const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+       opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
 
        for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
                unsigned int srcrgb = 0;
@@ -351,7 +458,52 @@ static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_i
        return 0;
 }
 
+static void presub_nop(struct rc_instruction * emitted) {
+       int prev_rgb_index, prev_alpha_index, i, num_src;
 
+       /* We don't need a nop if the previous instruction is a TEX. */
+       if (emitted->Prev->Type != RC_INSTRUCTION_PAIR) {
+               return;
+       }
+       if (emitted->Prev->U.P.RGB.WriteMask)
+               prev_rgb_index = emitted->Prev->U.P.RGB.DestIndex;
+       else
+               prev_rgb_index = -1;
+       if (emitted->Prev->U.P.Alpha.WriteMask)
+               prev_alpha_index = emitted->Prev->U.P.Alpha.DestIndex;
+       else
+               prev_alpha_index = 1;
+
+       /* Check the previous rgb instruction */
+       if (emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               num_src = rc_presubtract_src_reg_count(
+                               emitted->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+               for (i = 0; i < num_src; i++) {
+                       unsigned int index = emitted->U.P.RGB.Src[i].Index;
+                       if (emitted->U.P.RGB.Src[i].File == RC_FILE_TEMPORARY
+                           && (index  == prev_rgb_index
+                               || index == prev_alpha_index)) {
+                               emitted->Prev->U.P.Nop = 1;
+                               return;
+                       }
+               }
+       }
+
+       /* Check the previous alpha instruction. */
+       if (!emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
+               return;
+
+       num_src = rc_presubtract_src_reg_count(
+                               emitted->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+       for (i = 0; i < num_src; i++) {
+               unsigned int index = emitted->U.P.Alpha.Src[i].Index;
+               if(emitted->U.P.Alpha.Src[i].File == RC_FILE_TEMPORARY
+                  && (index == prev_rgb_index || index == prev_alpha_index)) {
+                       emitted->Prev->U.P.Nop = 1;
+                       return;
+               }
+       }
+}
 /**
  * Find a good ALU instruction or pair of ALU instruction and emit it.
  *
@@ -408,6 +560,10 @@ static void emit_one_alu(struct schedule_state *s, struct rc_instruction * befor
                commit_alu_instruction(s, sinst);
        success: ;
        }
+       /* If the instruction we just emitted uses a presubtract value, and
+        * the presubtract sources were written by the previous intstruction,
+        * the previous instruction needs a nop. */
+       presub_nop(before->Prev);
 }
 
 static void scan_read(void * data, struct rc_instruction * inst,
index 9fe39344f8e127eb96c6159983c9fbae0d40ef0a..4cdb7ea748ee3b71fdccc441d701d9bc327e00e4 100644 (file)
@@ -127,6 +127,18 @@ static void classify_instruction(struct rc_sub_instruction * inst,
        }
 }
 
+static void src_uses(struct rc_src_register src, unsigned int * rgb,
+                                                       unsigned int * alpha)
+{
+       int j;
+       for(j = 0; j < 4; ++j) {
+               unsigned int swz = GET_SWZ(src.Swizzle, j);
+               if (swz < 3)
+                       *rgb = 1;
+               else if (swz < 4)
+                       *alpha = 1;
+       }
+}
 
 /**
  * Fill the given ALU instruction's opcodes and source operands into the given pair,
@@ -158,12 +170,51 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
        int i;
 
+       /* Presubtract handling:
+        * We need to make sure that the values used by the presubtract
+        * operation end up in src0 or src1. */
+       if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
+               /* rc_pair_alloc_source() will fill in data for
+                * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
+               int j;
+               for(j = 0; j < 3; j++) {
+                       int src_regs;
+                       if(inst->SrcReg[j].File != RC_FILE_PRESUB)
+                               continue;
+
+                       src_regs = rc_presubtract_src_reg_count(
+                                                       inst->PreSub.Opcode);
+                       for(i = 0; i < src_regs; i++) {
+                               unsigned int rgb = 0;
+                               unsigned int alpha = 0;
+                               src_uses(inst->SrcReg[j], &rgb, &alpha);
+                               if(rgb) {
+                                       pair->RGB.Src[i].File =
+                                               inst->PreSub.SrcReg[i].File;
+                                       pair->RGB.Src[i].Index =
+                                               inst->PreSub.SrcReg[i].Index;
+                                       pair->RGB.Src[i].Used = 1;
+                               }
+                               if(alpha) {
+                                       pair->Alpha.Src[i].File =
+                                               inst->PreSub.SrcReg[i].File;
+                                       pair->Alpha.Src[i].Index =
+                                               inst->PreSub.SrcReg[i].Index;
+                                       pair->Alpha.Src[i].Used = 1;
+                               }
+                       }
+               }
+       }
+
        for(i = 0; i < opcode->NumSrcRegs; ++i) {
                int source;
                if (needrgb && !istranscendent) {
                        unsigned int srcrgb = 0;
                        unsigned int srcalpha = 0;
                        int j;
+                       /* We don't care about the alpha channel here.  We only
+                        * want the part of the swizzle that writes to rgb,
+                        * since we are creating an rgb instruction. */
                        for(j = 0; j < 3; ++j) {
                                unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
                                if (swz < 3)
@@ -173,6 +224,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
                        }
                        source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
                                                        inst->SrcReg[i].File, inst->SrcReg[i].Index);
+                       assert(source != -1);
                        pair->RGB.Arg[i].Source = source;
                        pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
                        pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
@@ -188,6 +240,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
                                srcalpha = 1;
                        source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
                                                        inst->SrcReg[i].File, inst->SrcReg[i].Index);
+                       assert(source != -1);
                        pair->Alpha.Arg[i].Source = source;
                        pair->Alpha.Arg[i].Swizzle = swz;
                        pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
index 5582f56d921b30e21a19a588e05f6230bcc8beee..f0a77d7b53967ecb2962c7c19fd571e699dbbb80 100644 (file)
@@ -39,7 +39,7 @@
 struct radeon_compiler;
 
 struct rc_src_register {
-       unsigned int File:3;
+       unsigned int File:4;
 
        /** Negative values may be used for relative addressing. */
        signed int Index:(RC_REGISTER_INDEX_BITS+1);
@@ -64,6 +64,11 @@ struct rc_dst_register {
        unsigned int WriteMask:4;
 };
 
+struct rc_presub_instruction {
+       rc_presubtract_op Opcode;
+       struct rc_src_register SrcReg[2];
+};
+
 /**
  * Instructions are maintained by the compiler in a doubly linked list
  * of these structures.
@@ -108,6 +113,10 @@ struct rc_sub_instruction {
        /** True if tex instruction should do shadow comparison */
        unsigned int TexShadow:1;
        /*@}*/
+
+       /** This holds information about the presubtract operation used by
+        * this instruction. */
+       struct rc_presub_instruction PreSub;
 };
 
 typedef enum {
index 2ddf60b6774e53cb2629fe28515eeed49d7167f3..9dcd44c522dbc24a72ded4df6bddd5f4caf917de 100644 (file)
@@ -79,7 +79,13 @@ typedef enum {
        /**
         * Indicates a special register, see RC_SPECIAL_xxx.
         */
-       RC_FILE_SPECIAL
+       RC_FILE_SPECIAL,
+
+       /**
+        * Indicates this register should use the result of the presubtract
+        * operation.
+        */
+       RC_FILE_PRESUB
 } rc_register_file;
 
 enum {
@@ -147,4 +153,32 @@ typedef enum {
        RC_ALURESULT_W
 } rc_write_aluresult;
 
+typedef enum {
+       RC_PRESUB_NONE = 0,
+
+       /** 1 - 2 * src0 */
+       RC_PRESUB_BIAS,
+
+       /** src1 - src0 */
+       RC_PRESUB_SUB,
+
+       /** src1 + src0 */
+       RC_PRESUB_ADD,
+
+       /** 1 - src0 */
+       RC_PRESUB_INV
+} rc_presubtract_op;
+
+static inline int rc_presubtract_src_reg_count(rc_presubtract_op op){
+       switch(op){
+       case RC_PRESUB_BIAS:
+       case RC_PRESUB_INV:
+               return 1;
+       case RC_PRESUB_ADD:
+       case RC_PRESUB_SUB:
+               return 2;
+       default:
+               return 0;
+       }
+}
 #endif /* RADEON_PROGRAM_CONSTANTS_H */
index ee839596aab0f8b440c1279538f8cd6a2a68a4f5..5a50584b7250a37dcc0f0c2081c18a44e586f43e 100644 (file)
@@ -38,26 +38,52 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
 {
        int candidate = -1;
        int candidate_quality = -1;
+       unsigned int alpha_used = 0;
+       unsigned int rgb_used = 0;
        int i;
 
        if ((!rgb && !alpha) || file == RC_FILE_NONE)
                return 0;
 
+       if (rgb && pair->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               if (file == RC_FILE_PRESUB) {
+                       if (index != pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
+                               return -1;
+                       }
+               } else {
+                       rgb_used++;
+               }
+       }
+
+       if (alpha && pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+               if (file == RC_FILE_PRESUB) {
+                       if (index != pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
+                               return -1;
+                       }
+               } else {
+                       alpha_used++;
+               }
+       }
+
        for(i = 0; i < 3; ++i) {
                int q = 0;
                if (rgb) {
                        if (pair->RGB.Src[i].Used) {
                                if (pair->RGB.Src[i].File != file ||
-                                   pair->RGB.Src[i].Index != index)
+                                   pair->RGB.Src[i].Index != index) {
+                                       rgb_used++;
                                        continue;
+                               }
                                q++;
                        }
                }
                if (alpha) {
                        if (pair->Alpha.Src[i].Used) {
                                if (pair->Alpha.Src[i].File != file ||
-                                   pair->Alpha.Src[i].Index != index)
+                                   pair->Alpha.Src[i].Index != index) {
+                                       alpha_used++;
                                        continue;
+                               }
                                q++;
                        }
                }
@@ -66,19 +92,156 @@ int rc_pair_alloc_source(struct rc_pair_instruction *pair,
                        candidate = i;
                }
        }
+       if (candidate < 0 || (rgb && rgb_used > 2) || (alpha && alpha_used > 2))
+               return -1;
 
-       if (candidate >= 0) {
-               if (rgb) {
-                       pair->RGB.Src[candidate].Used = 1;
-                       pair->RGB.Src[candidate].File = file;
-                       pair->RGB.Src[candidate].Index = index;
+       /* candidate >= 0 */
+
+       /* Even if we have a presub src, the above loop needs to run,
+        * because we still need to make sure there is a free source.
+        */
+       if (file == RC_FILE_PRESUB)
+               candidate = RC_PAIR_PRESUB_SRC;
+
+       if (rgb) {
+               pair->RGB.Src[candidate].Used = 1;
+               pair->RGB.Src[candidate].File = file;
+               pair->RGB.Src[candidate].Index = index;
+               if (candidate == RC_PAIR_PRESUB_SRC) {
+                       /* For registers with the RC_FILE_PRESUB file,
+                        * the index stores the presubtract op. */
+                       int src_regs = rc_presubtract_src_reg_count(index);
+                       for(i = 0; i < src_regs; i++) {
+                               pair->RGB.Src[i].Used = 1;
+                       }
                }
-               if (alpha) {
-                       pair->Alpha.Src[candidate].Used = 1;
-                       pair->Alpha.Src[candidate].File = file;
-                       pair->Alpha.Src[candidate].Index = index;
+       }
+       if (alpha) {
+               pair->Alpha.Src[candidate].Used = 1;
+               pair->Alpha.Src[candidate].File = file;
+               pair->Alpha.Src[candidate].Index = index;
+               if (candidate == RC_PAIR_PRESUB_SRC) {
+                       /* For registers with the RC_FILE_PRESUB file,
+                        * the index stores the presubtract op. */
+                       int src_regs = rc_presubtract_src_reg_count(index);
+                       for(i=0; i < src_regs; i++) {
+                               pair->Alpha.Src[i].Used = 1;
+                       }
                }
        }
 
        return candidate;
 }
+
+static void pair_foreach_source_callback(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb,
+       unsigned int swz,
+       unsigned int src)
+{
+       /* swz > 3 means that the swizzle is either not used, or a constant
+        * swizzle (e.g. 0, 1, 0.5). */
+       if(swz > 3)
+               return;
+
+       if(swz == RC_SWIZZLE_W) {
+               if (src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       unsigned int src_count = rc_presubtract_src_reg_count(
+                               pair->Alpha.Src[RC_PAIR_PRESUB_SRC].Index);
+                       for(i = 0; i < src_count; i++) {
+                               cb(data, &pair->Alpha.Src[i]);
+                       }
+               } else {
+                       cb(data, &pair->Alpha.Src[src]);
+               }
+       } else {
+               if (src == RC_PAIR_PRESUB_SRC) {
+                       unsigned int i;
+                       unsigned int src_count = rc_presubtract_src_reg_count(
+                               pair->RGB.Src[RC_PAIR_PRESUB_SRC].Index);
+                       for(i = 0; i < src_count; i++) {
+                               cb(data, &pair->RGB.Src[i]);
+                       }
+               }
+               else {
+                       cb(data, &pair->RGB.Src[src]);
+               }
+       }
+}
+
+void rc_pair_foreach_source_that_alpha_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb)
+{
+       unsigned int i;
+       const struct rc_opcode_info * info =
+                               rc_get_opcode_info(pair->Alpha.Opcode);
+       for(i = 0; i < info->NumSrcRegs; i++) {
+               pair_foreach_source_callback(pair, data, cb,
+                                       GET_SWZ(pair->Alpha.Arg[i].Swizzle, 0),
+                                       pair->Alpha.Arg[i].Source);
+       }
+}
+
+void rc_pair_foreach_source_that_rgb_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb)
+{
+       unsigned int i;
+       const struct rc_opcode_info * info =
+                               rc_get_opcode_info(pair->RGB.Opcode);
+       for(i = 0; i < info->NumSrcRegs; i++) {
+               unsigned int chan;
+               unsigned int swz = RC_SWIZZLE_UNUSED;
+               /* Find a swizzle that is either X,Y,Z,or W.  We assume here
+                * that if one channel swizzles X,Y, or Z, then none of the
+                * other channels swizzle W, and vice-versa. */
+               for(chan = 0; chan < 4; chan++) {
+                       swz = GET_SWZ(pair->RGB.Arg[i].Swizzle, chan);
+                       if(swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+                       || swz == RC_SWIZZLE_Z || swz == RC_SWIZZLE_W)
+                               continue;
+               }
+               pair_foreach_source_callback(pair, data, cb,
+                                       swz,
+                                       pair->RGB.Arg[i].Source);
+       }
+}
+
+/*return 0 for rgb, 1 for alpha -1 for error. */
+
+rc_pair_source_type rc_source_type_that_arg_reads(
+       unsigned int source,
+       unsigned int swizzle,
+       unsigned int channels)
+{
+       unsigned int chan;
+       unsigned int swz = RC_SWIZZLE_UNUSED;
+       int isRGB = 0;
+       int isAlpha = 0;
+       /* Find a swizzle that is either X,Y,Z,or W.  We assume here
+        * that if one channel swizzles X,Y, or Z, then none of the
+        * other channels swizzle W, and vice-versa. */
+       for(chan = 0; chan < channels; chan++) {
+               swz = GET_SWZ(swizzle, chan);
+               if (swz == RC_SWIZZLE_W) {
+                       isAlpha = 1;
+               } else if (swz == RC_SWIZZLE_X || swz == RC_SWIZZLE_Y
+                                               || swz == RC_SWIZZLE_Z) {
+                       isRGB = 1;
+               }
+       }
+       assert(!isRGB || !isAlpha);
+
+       if(!isRGB && !isAlpha)
+               return RC_PAIR_SOURCE_NONE;
+
+       if (isRGB)
+               return RC_PAIR_SOURCE_RGB;
+       /*isAlpha*/
+       return RC_PAIR_SOURCE_ALPHA;
+}
index ef5a034700953742e09334edebe2d05d26607eb7..e0061e454bfde420878f11379cb2e4a341d3dd55 100644 (file)
@@ -49,6 +49,11 @@ struct radeon_compiler;
  * see \ref rc_pair_translate
  */
 
+/* For rgb and alpha instructions when arg[n].Source = RC_PAIR_PRESUB_SRC, then
+ * the presubtract value will be used, and
+ * {RGB,Alpha}.Src[RC_PAIR_PRESUB_SRC].File will be set to RC_FILE_PRESUB.
+ */
+#define RC_PAIR_PRESUB_SRC 3
 
 struct radeon_pair_instruction_source {
        unsigned int Used:1;
@@ -64,7 +69,7 @@ struct radeon_pair_instruction_rgb {
        unsigned int OutputWriteMask:3;
        unsigned int Saturate:1;
 
-       struct radeon_pair_instruction_source Src[3];
+       struct radeon_pair_instruction_source Src[4];
 
        struct {
                unsigned int Source:2;
@@ -83,7 +88,7 @@ struct radeon_pair_instruction_alpha {
        unsigned int DepthWriteMask:1;
        unsigned int Saturate:1;
 
-       struct radeon_pair_instruction_source Src[3];
+       struct radeon_pair_instruction_source Src[4];
 
        struct {
                unsigned int Source:2;
@@ -99,8 +104,17 @@ struct rc_pair_instruction {
 
        unsigned int WriteALUResult:2;
        unsigned int ALUResultCompare:3;
+       unsigned int Nop:1;
 };
 
+typedef void (*rc_pair_foreach_src_fn)
+                       (void *, struct radeon_pair_instruction_source *);
+
+typedef enum {
+       RC_PAIR_SOURCE_NONE = 0,
+       RC_PAIR_SOURCE_RGB,
+       RC_PAIR_SOURCE_ALPHA
+} rc_pair_source_type;
 
 /**
  * General helper functions for dealing with the paired instruction format.
@@ -109,6 +123,21 @@ struct rc_pair_instruction {
 int rc_pair_alloc_source(struct rc_pair_instruction *pair,
        unsigned int rgb, unsigned int alpha,
        rc_register_file file, unsigned int index);
+
+void rc_pair_foreach_source_that_alpha_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb);
+
+void rc_pair_foreach_source_that_rgb_reads(
+       struct rc_pair_instruction * pair,
+       void * data,
+       rc_pair_foreach_src_fn cb);
+
+rc_pair_source_type rc_source_type_that_arg_reads(
+       unsigned int source,
+       unsigned int swizzle,
+       unsigned int channels);
 /*@}*/
 
 
index a356e94e0328d6272bbe3f16199227baae05f616..01612195810c08951776b5fbcf74bb35fcaa6b3b 100644 (file)
@@ -38,6 +38,24 @@ static const char * textarget_to_string(rc_texture_target target)
        }
 }
 
+static const char * presubtract_op_to_string(rc_presubtract_op op)
+{
+       switch(op) {
+       case RC_PRESUB_NONE:
+               return "NONE";
+       case RC_PRESUB_BIAS:
+               return "(1 - 2 * src0)";
+       case RC_PRESUB_SUB:
+               return "(src1 - src0)";
+       case RC_PRESUB_ADD:
+               return "(src1 + src0)";
+       case RC_PRESUB_INV:
+               return "(1 - src0)";
+       default:
+               return "BAD_PRESUBTRACT_OP";
+       }
+}
+
 static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
 {
        if (func == RC_COMPARE_FUNC_NEVER) {
@@ -125,7 +143,43 @@ static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate
        }
 }
 
-static void rc_print_src_register(FILE * f, struct rc_src_register src)
+static void rc_print_presub_instruction(FILE * f,
+                                       struct rc_presub_instruction inst)
+{
+       fprintf(f,"(");
+       switch(inst.Opcode){
+       case RC_PRESUB_BIAS:
+               fprintf(f, "1 - 2 * ");
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               break;
+       case RC_PRESUB_SUB:
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               fprintf(f, " - ");
+               rc_print_register(f, inst.SrcReg[1].File,
+                               inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+               break;
+       case RC_PRESUB_ADD:
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               fprintf(f, " + ");
+               rc_print_register(f, inst.SrcReg[1].File,
+                               inst.SrcReg[1].Index,inst.SrcReg[1].RelAddr);
+               break;
+       case RC_PRESUB_INV:
+               fprintf(f, "1 - ");
+               rc_print_register(f, inst.SrcReg[0].File,
+                               inst.SrcReg[0].Index,inst.SrcReg[0].RelAddr);
+               break;
+       default:
+               break;
+       }
+       fprintf(f, ")");
+}
+
+static void rc_print_src_register(FILE * f, struct rc_instruction * inst,
+                                               struct rc_src_register src)
 {
        int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
 
@@ -134,7 +188,10 @@ static void rc_print_src_register(FILE * f, struct rc_src_register src)
        if (src.Abs)
                fprintf(f, "|");
 
-       rc_print_register(f, src.File, src.Index, src.RelAddr);
+       if(src.File == RC_FILE_PRESUB)
+               rc_print_presub_instruction(f, inst->U.I.PreSub);
+       else
+               rc_print_register(f, src.File, src.Index, src.RelAddr);
 
        if (src.Abs && !trivial_negate)
                fprintf(f, "|");
@@ -198,7 +255,7 @@ static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst,
                if (reg > 0)
                        fprintf(f, ",");
                fprintf(f, " ");
-               rc_print_src_register(f, inst->U.I.SrcReg[reg]);
+               rc_print_src_register(f, inst, inst->U.I.SrcReg[reg]);
        }
 
        if (opcode->HasTexture) {
@@ -247,6 +304,16 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
                        printedsrc = 1;
                }
        }
+       if(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
+               fprintf(f, ", srcp.xyz = %s",
+                       presubtract_op_to_string(
+                                       inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index));
+       }
+       if(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
+               fprintf(f, ", srcp.w = %s",
+                       presubtract_op_to_string(
+                                       inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index));
+       }
        fprintf(f, "\n");
 
        if (inst->RGB.Opcode != RC_OPCODE_NOP) {
@@ -272,7 +339,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
                for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
                        const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
                        const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
-                       fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
+                       fprintf(f, ", %s%ssrc", neg, abs);
+                       if(inst->RGB.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+                               fprintf(f,"p");
+                       else
+                               fprintf(f,"%d", inst->RGB.Arg[arg].Source);
+                       fprintf(f,".%c%c%c%s",
                                rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
                                rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
                                rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
@@ -300,7 +372,12 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst
                for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
                        const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
                        const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
-                       fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
+                       fprintf(f, ", %s%ssrc", neg, abs);
+                       if(inst->Alpha.Arg[arg].Source == RC_PAIR_PRESUB_SRC)
+                               fprintf(f,"p");
+                       else
+                               fprintf(f,"%d", inst->Alpha.Arg[arg].Source);
+                       fprintf(f,".%c%s",
                                rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
                }
                fprintf(f, "\n");
index 9281feecfa180ef59026e5574740cdea58683c85..facea382f4ee14eae5a928fcb0d856814a1ad4d2 100644 (file)
 
 #include "radeon_remove_constants.h"
 
+static void remap_regs(void * userdata, struct rc_instruction * inst,
+                       rc_register_file * pfile, unsigned int * pindex)
+{
+        unsigned *inv_remap_table = userdata;
+
+        if (*pfile == RC_FILE_CONSTANT) {
+                *pindex = inv_remap_table[*pindex];
+        }
+}
+
 void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
 {
        unsigned **out_remap_table = (unsigned**)user;
@@ -51,6 +61,10 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
              inst != &c->Program.Instructions; inst = inst->Next) {
                 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
 
+                /* XXX: This loop and the if statement after it should be
+                 * replaced by a call to one of the rc_for_all_reads_* functions.
+                 * The reason it does not use one of those functions now is
+                 * because none of them have RelAddr as an argument. */
                 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
                         if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
                                 if (inst->U.I.SrcReg[i].RelAddr) {
@@ -60,6 +74,18 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
                                 }
                         }
                 }
+                if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+                       unsigned int i;
+                       unsigned int srcp_regs = rc_presubtract_src_reg_count(
+                                                       inst->U.I.PreSub.Opcode);
+                       for( i = 0; i < srcp_regs; i++) {
+                                if (inst->U.I.PreSub.SrcReg[i].File ==
+                                                        RC_FILE_CONSTANT) {
+                                        const_used[
+                                            inst->U.I.PreSub.SrcReg[i].Index] = 1;
+                                }
+                        }
+               }
         }
 
         /* Pass 2: If there is relative addressing, mark all externals as used. */
@@ -100,13 +126,7 @@ void rc_remove_unused_constants(struct radeon_compiler *c, void *user)
         if (!is_identity) {
                 for (struct rc_instruction *inst = c->Program.Instructions.Next;
                      inst != &c->Program.Instructions; inst = inst->Next) {
-                        const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
-
-                        for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
-                                if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
-                                        inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index];
-                                }
-                        }
+                        rc_remap_registers(inst, remap_regs, inv_remap_table);
                 }
 
        }