Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / radeon_optimize.c
index e01ba85aa56503f8cade8a6a5cf905e984033800..4d9120ffd09f8104120c152b9fb26e0cd423f98c 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
  *
  * All Rights Reserved.
  *
@@ -28,6 +29,7 @@
 #include "radeon_dataflow.h"
 
 #include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
 #include "radeon_swizzle.h"
 
 struct peephole_state {
@@ -38,6 +40,10 @@ struct peephole_state {
        unsigned int WriteMask;
 };
 
+typedef void (*rc_presub_replace_fn)(struct peephole_state *,
+                                               struct rc_instruction *,
+                                               unsigned int);
+
 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
 {
        struct rc_src_register combine;
@@ -82,84 +88,65 @@ struct copy_propagate_state {
        int BranchDepth;
 };
 
-/**
- * This is a callback function that is meant to be passed to
- * rc_for_all_reads_mask.  This function will be called once for each source
- * register in inst.
- * @param inst The instruction that the source register belongs to.
- * @param file The register file of the source register.
- * @param index The index of the source register.
- * @param mask The components of the source register that are being read from.
- */
 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
-               rc_register_file file, unsigned int index, unsigned int mask)
+                                               struct rc_src_register * src)
 {
-       struct copy_propagate_state * s = data;
+       rc_register_file file = src->File;
+       struct rc_reader_data * reader_data = data;
+       const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
+
+       /* It is possible to do copy propigation in this situation,
+        * just not right now, see peephole_add_presub_inv() */
+       if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
+                       (info->NumSrcRegs > 2 || info->HasTexture)) {
+               reader_data->Abort = 1;
+               return;
+       }
 
        /* XXX This could probably be handled better. */
        if (file == RC_FILE_ADDRESS) {
-               s->Conflict = 1;
+               reader_data->Abort = 1;
                return;
        }
 
-       if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
-               return;
-
        /* These instructions cannot read from the constants file.
         * see radeonTransformTEX()
         */
-       if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
-                       s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+       if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+                       reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
                                (inst->U.I.Opcode == RC_OPCODE_TEX ||
                                inst->U.I.Opcode == RC_OPCODE_TXB ||
                                inst->U.I.Opcode == RC_OPCODE_TXP ||
                                inst->U.I.Opcode == RC_OPCODE_KIL)){
-               s->Conflict = 1;
+               reader_data->Abort = 1;
                return;
        }
-       if ((mask & s->MovMask) == mask) {
-               if (s->SourceClobbered) {
-                       s->Conflict = 1;
-               }
-       } else if ((mask & s->DefinedMask) == mask) {
-               /* read from something entirely written by other instruction: this is okay */
-       } else {
-               /* read from component combination that is not well-defined without
-                * the MOV: cannot remove it */
-               s->Conflict = 1;
-       }
 }
 
 static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
                rc_register_file file, unsigned int index, unsigned int mask)
 {
-       struct copy_propagate_state * s = data;
+       struct rc_reader_data * reader_data = data;
+       struct copy_propagate_state * s = reader_data->CbData;
 
-       if (s->BranchDepth < 0)
-               return;
-
-       if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
-               s->MovMask &= ~mask;
-               if (s->BranchDepth == 0)
-                       s->DefinedMask |= mask;
-               else
-                       s->DefinedMask &= ~mask;
-       }
-       if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+       if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
                if (mask & s->SourcedMask)
-                       s->SourceClobbered = 1;
+                       reader_data->AbortOnRead = 1;
        } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
-               s->SourceClobbered = 1;
+               reader_data->AbortOnRead = 1;
        }
 }
 
 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
 {
        struct copy_propagate_state s;
+       struct rc_reader_data reader_data;
+       unsigned int i;
 
        if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
            inst_mov->U.I.DstReg.RelAddr ||
-           inst_mov->U.I.WriteALUResult)
+           inst_mov->U.I.WriteALUResult ||
+           inst_mov->U.I.SaturateMode)
                return;
 
        memset(&s, 0, sizeof(s));
@@ -168,95 +155,27 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i
        s.MovMask = inst_mov->U.I.DstReg.WriteMask;
        s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
 
+       reader_data.CbData = &s;
+
        for(unsigned int chan = 0; chan < 4; ++chan) {
                unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
                s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
        }
 
-       /* 1st pass: Check whether all subsequent readers can be changed */
-       for(struct rc_instruction * inst = inst_mov->Next;
-           inst != &c->Program.Instructions;
-           inst = inst->Next) {
-               const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
-               /* XXX In the future we might be able to make the optimizer
-                * smart enough to handle loops. */
-               if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
-                               || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
-                       return;
-               }
-
-               /* It is possible to do copy propigation in this situation,
-                * just not right now, see peephole_add_presub_inv() */
-               if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
-                                               info->NumSrcRegs > 2) {
-                       return;
-               }
-
-               rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
-               rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
-               if (s.Conflict)
-                       return;
+       /* Get a list of all the readers of this MOV instruction. */
+       rc_get_readers_normal(c, inst_mov, &reader_data,
+                       copy_propagate_scan_read, copy_propagate_scan_write);
 
-               if (s.BranchDepth >= 0) {
-                       if (inst->U.I.Opcode == RC_OPCODE_IF) {
-                               s.BranchDepth++;
-                       } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
-                               || inst->U.I.Opcode == RC_OPCODE_ELSE) {
-                               s.BranchDepth--;
-                               if (s.BranchDepth < 0) {
-                                       s.DefinedMask &= ~s.MovMask;
-                                       s.MovMask = 0;
-                               }
-                       }
-               }
-       }
-
-       if (s.Conflict)
+       if (reader_data.Abort || reader_data.ReaderCount == 0)
                return;
 
-       /* 2nd pass: We can satisfy all readers, so switch them over all at once */
-       s.MovMask = inst_mov->U.I.DstReg.WriteMask;
-       s.BranchDepth = 0;
-
-       for(struct rc_instruction * inst = inst_mov->Next;
-           inst != &c->Program.Instructions;
-           inst = inst->Next) {
-               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
-               for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
-                       if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
-                           inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
-                               unsigned int refmask = 0;
-
-                               for(unsigned int chan = 0; chan < 4; ++chan) {
-                                       unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
-                                       refmask |= (1 << swz) & RC_MASK_XYZW;
-                               }
-
-                               if ((refmask & s.MovMask) == refmask) {
-                                       inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
-                                       if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
-                                               inst->U.I.PreSub = s.Mov->U.I.PreSub;
-                               }
-                       }
-               }
-
-               if (opcode->HasDstReg) {
-                       if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
-                           inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
-                               s.MovMask &= ~inst->U.I.DstReg.WriteMask;
-                       }
-               }
+       /* Propagate the MOV instruction. */
+       for (i = 0; i < reader_data.ReaderCount; i++) {
+               struct rc_instruction * inst = reader_data.Readers[i].Inst;
+               *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]);
 
-               if (s.BranchDepth >= 0) {
-                       if (inst->U.I.Opcode == RC_OPCODE_IF) {
-                               s.BranchDepth++;
-                       } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
-                               || inst->U.I.Opcode == RC_OPCODE_ELSE) {
-                               s.BranchDepth--;
-                               if (s.BranchDepth < 0)
-                                       break; /* no more readers after this point */
-                       }
-               }
+               if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+                       inst->U.I.PreSub = s.Mov->U.I.PreSub;
        }
 
        /* Finally, remove the original MOV instruction */
@@ -403,30 +322,38 @@ static void constant_folding_add(struct rc_instruction * inst)
 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
 {
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       unsigned int i;
 
        /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
        for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+               struct rc_constant * constant;
+               struct rc_src_register newsrc;
+               int have_real_reference;
+
                if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
                    inst->U.I.SrcReg[src].RelAddr ||
                    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
                        continue;
 
-               struct rc_constant * constant =
+               constant =
                        &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
 
                if (constant->Type != RC_CONSTANT_IMMEDIATE)
                        continue;
 
-               struct rc_src_register newsrc = inst->U.I.SrcReg[src];
-               int have_real_reference = 0;
+               newsrc = inst->U.I.SrcReg[src];
+               have_real_reference = 0;
                for(unsigned int chan = 0; chan < 4; ++chan) {
                        unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+                       unsigned int newswz;
+                       float imm;
+                       float baseimm;
+
                        if (swz >= 4)
                                continue;
 
-                       unsigned int newswz;
-                       float imm = constant->u.Immediate[swz];
-                       float baseimm = imm;
+                       imm = constant->u.Immediate[swz];
+                       baseimm = imm;
                        if (imm < 0.0)
                                baseimm = -baseimm;
 
@@ -468,27 +395,26 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction *
                constant_folding_mul(inst);
        else if (inst->U.I.Opcode == RC_OPCODE_ADD)
                constant_folding_add(inst);
+
+       /* In case this instruction has been converted, make sure all of the
+        * registers that are no longer used are empty. */
+       opcode = rc_get_opcode_info(inst->U.I.Opcode);
+       for(i = opcode->NumSrcRegs; i < 3; i++) {
+               memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
+       }
 }
 
 /**
- * This function returns a writemask that indicates wich components are
- * read by src and also written by dst.
+ * If src and dst use the same register, this function returns a writemask that
+ * indicates wich components are read by src.  Otherwise zero is returned.
  */
 static unsigned int src_reads_dst_mask(struct rc_src_register src,
                                                struct rc_dst_register dst)
 {
-       unsigned int mask = 0;
-       unsigned int i;
        if (dst.File != src.File || dst.Index != src.Index) {
                return 0;
        }
-
-       for(i = 0; i < 4; i++) {
-               mask |= 1 << GET_SWZ(src.Swizzle, i);
-       }
-       mask &= RC_MASK_XYZW;
-
-       return mask;
+       return rc_swizzle_to_writemask(src.Swizzle);
 }
 
 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
@@ -516,6 +442,194 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
        }
 }
 
+static int presub_helper(
+       struct radeon_compiler * c,
+       struct peephole_state * s,
+       rc_presubtract_op presub_opcode,
+       rc_presub_replace_fn presub_replace)
+{
+       struct rc_instruction * inst;
+       unsigned int can_remove = 0;
+       unsigned int cant_sub = 0;
+
+       for(inst = s->Inst->Next; inst != &c->Program.Instructions;
+                                                       inst = inst->Next) {
+               unsigned int i;
+               unsigned char can_use_presub = 1;
+               const struct rc_opcode_info * info =
+                                       rc_get_opcode_info(inst->U.I.Opcode);
+               /* XXX: There are some situations where instructions
+                * with more than 2 src registers can use the
+                * presubtract select, but to keep things simple we
+                * will disable presubtract on these instructions for
+                * now. */
+               if (info->NumSrcRegs > 2 || info->HasTexture) {
+                       can_use_presub = 0;
+               }
+
+               /* We can't use more than one presubtract value in an
+                * instruction, unless the two prsubtract operations
+                * are the same and read from the same registers. */
+               if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
+                       if (inst->U.I.PreSub.Opcode != presub_opcode
+                               || inst->U.I.PreSub.SrcReg[0].File !=
+                                       s->Inst->U.I.SrcReg[1].File
+                               || inst->U.I.PreSub.SrcReg[0].Index !=
+                                       s->Inst->U.I.SrcReg[1].Index) {
+                               can_use_presub = 0;
+                       }
+               }
+
+               /* Even if the instruction can't use a presubtract operation
+                * we still need to check if the instruction reads from
+                * s->Inst->U.I.DstReg, because if it does we must not
+                * remove s->Inst. */
+               for(i = 0; i < info->NumSrcRegs; i++) {
+                       unsigned int mask = src_reads_dst_mask(
+                               inst->U.I.SrcReg[i], s->Inst->U.I.DstReg);
+                       /* XXX We could be more aggressive here using
+                        * presubtract.  It is okay if SrcReg[i] only reads
+                        * from some of the mask components. */
+                       if(s->Inst->U.I.DstReg.WriteMask != mask) {
+                               if (s->Inst->U.I.DstReg.WriteMask & mask) {
+                                       can_remove = 0;
+                                       break;
+                               } else {
+                                       continue;
+                               }
+                       }
+                       if (cant_sub || !can_use_presub) {
+                               can_remove = 0;
+                               break;
+                       }
+                       presub_replace(s, inst, i);
+                       can_remove = 1;
+               }
+               if(!can_remove)
+                       break;
+               rc_for_all_writes_mask(inst, peephole_scan_write, s);
+               /* If all components of inst_add's destination register have
+                * been written to by subsequent instructions, the original
+                * value of the destination register is no longer valid and
+                * we can't keep doing substitutions. */
+               if (!s->WriteMask){
+                       break;
+               }
+               /* Make this instruction doesn't write to the presubtract source. */
+               if (inst->U.I.DstReg.WriteMask &
+                               src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
+                                                       inst->U.I.DstReg)
+                               || src_reads_dst_mask(s->Inst->U.I.SrcReg[0],
+                                                       inst->U.I.DstReg)
+                               || info->IsFlowControl) {
+                       cant_sub = 1;
+               }
+       }
+       return can_remove;
+}
+
+/* This function assumes that s->Inst->U.I.SrcReg[0] and
+ * s->Inst->U.I.SrcReg[1] aren't both negative. */
+static void presub_replace_add(struct peephole_state *s,
+                                               struct rc_instruction * inst,
+                                               unsigned int src_index)
+{
+       rc_presubtract_op presub_opcode;
+       if (s->Inst->U.I.SrcReg[1].Negate || s->Inst->U.I.SrcReg[0].Negate)
+               presub_opcode = RC_PRESUB_SUB;
+       else
+               presub_opcode = RC_PRESUB_ADD;
+
+       if (s->Inst->U.I.SrcReg[1].Negate) {
+               inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
+               inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[0];
+       } else {
+               inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
+               inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
+       }
+       inst->U.I.PreSub.SrcReg[0].Negate = 0;
+       inst->U.I.PreSub.SrcReg[1].Negate = 0;
+       inst->U.I.PreSub.Opcode = presub_opcode;
+       inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+                                               inst->U.I.PreSub.SrcReg[0]);
+       inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+       inst->U.I.SrcReg[src_index].Index = presub_opcode;
+}
+
+static int is_presub_candidate(struct rc_instruction * inst)
+{
+       const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
+       unsigned int i;
+
+       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE || inst->U.I.SaturateMode)
+               return 0;
+
+       for(i = 0; i < info->NumSrcRegs; i++) {
+               if (src_reads_dst_mask(inst->U.I.SrcReg[i], inst->U.I.DstReg))
+                       return 0;
+       }
+       return 1;
+}
+
+static int peephole_add_presub_add(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_add)
+{
+       struct rc_src_register * src0 = NULL;
+       struct rc_src_register * src1 = NULL;
+       unsigned int i;
+       struct peephole_state s;
+
+       if (!is_presub_candidate(inst_add))
+               return 0;
+
+       if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+               return 0;
+
+       /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
+       for (i = 0; i < 2; i++) {
+               if (inst_add->U.I.SrcReg[i].Abs)
+                       return 0;
+               if ((inst_add->U.I.SrcReg[i].Negate
+                                       & inst_add->U.I.DstReg.WriteMask) ==
+                                               inst_add->U.I.DstReg.WriteMask) {
+                       src0 = &inst_add->U.I.SrcReg[i];
+               } else if (!src1) {
+                       src1 = &inst_add->U.I.SrcReg[i];
+               } else {
+                       src0 = &inst_add->U.I.SrcReg[i];
+               }
+       }
+
+       if (!src1)
+               return 0;
+
+       s.Inst = inst_add;
+       s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+       if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
+               rc_remove_instruction(inst_add);
+               return 1;
+       }
+       return 0;
+}
+
+static void presub_replace_inv(struct peephole_state * s,
+                                               struct rc_instruction * inst,
+                                               unsigned int src_index)
+{
+       /* We must be careful not to modify s->Inst, since it
+        * is possible it will remain part of the program. 
+        * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
+       inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
+       inst->U.I.PreSub.SrcReg[0].Negate = 0;
+       inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
+       inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+                                               inst->U.I.PreSub.SrcReg[0]);
+
+       inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+       inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
 /**
  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
@@ -531,12 +645,9 @@ static int peephole_add_presub_inv(
        struct rc_instruction * inst_add)
 {
        unsigned int i, swz, mask;
-       unsigned int can_remove = 0;
-       unsigned int cant_sub = 0;
-       struct rc_instruction * inst;
        struct peephole_state s;
 
-       if (inst_add->U.I.SaturateMode)
+       if (!is_presub_candidate(inst_add))
                return 0;
 
        mask = inst_add->U.I.DstReg.WriteMask;
@@ -567,81 +678,7 @@ static int peephole_add_presub_inv(
        s.Inst = inst_add;
        s.WriteMask = inst_add->U.I.DstReg.WriteMask;
 
-       /* For all instructions that read inst_add->U.I.DstReg before it is
-        * written again, use the 1 - src0 presubtact instead. */
-       for(inst = inst_add->Next; inst != &c->Program.Instructions;
-                                                       inst = inst->Next) {
-               const struct rc_opcode_info * info =
-                                       rc_get_opcode_info(inst->U.I.Opcode);
-
-               for(i = 0; i < info->NumSrcRegs; i++) {
-                       if(inst_add->U.I.DstReg.WriteMask !=
-                                       src_reads_dst_mask(inst->U.I.SrcReg[i],
-                                               inst_add->U.I.DstReg)) {
-                               continue;
-                       }
-                       if (cant_sub) {
-                               can_remove = 0;
-                               break;
-                       }
-                       /* XXX: There are some situations where instructions
-                        * with more than 2 src registers can use the
-                        * presubtract select, but to keep things simple we
-                        * will disable presubtract on these instructions for
-                        * now. Note: This if statement should not be pulled
-                        * outside of the loop, because it only applies to
-                        * instructions that could potentially use the
-                        * presubtract source. */
-                       if (info->NumSrcRegs > 2) {
-                               can_remove = 0;
-                               break;
-                       }
-
-                       /* We can't use more than one presubtract value in an
-                        * instruction, unless the two prsubtract operations
-                        * are the same and read from the same registers. */
-                       if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
-                               if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
-                                       || inst->U.I.PreSub.SrcReg[0].File !=
-                                               inst_add->U.I.SrcReg[1].File
-                                       || inst->U.I.PreSub.SrcReg[0].Index !=
-                                               inst_add->U.I.SrcReg[1].Index) {
-
-                                       can_remove = 0;
-                                       break;
-                               }
-                       }
-                       /* We must be careful not to modify inst_add, since it
-                        * is possible it will remain part of the program. */
-                       inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
-                       inst->U.I.PreSub.SrcReg[0].Negate = 0;
-                       inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
-                       inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
-                                               inst->U.I.PreSub.SrcReg[0]);
-
-                       inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
-                       inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
-                       can_remove = 1;
-               }
-               if(!can_remove)
-                       break;
-               rc_for_all_writes_mask(inst, peephole_scan_write, &s);
-               /* If all components of inst_add's destination register have
-                * been written to by subsequent instructions, the original
-                * value of the destination register is no longer valid and
-                * we can't keep doing substitutions. */
-               if (!s.WriteMask){
-                       break;
-               }
-               /* Make this instruction doesn't write to the presubtract source. */
-               if (inst->U.I.DstReg.WriteMask &
-                               src_reads_dst_mask(inst_add->U.I.SrcReg[1],
-                                                       inst->U.I.DstReg)
-                               || info->IsFlowControl) {
-                       cant_sub = 1;
-               }
-       }
-       if(can_remove) {
+       if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
                rc_remove_instruction(inst_add);
                return 1;
        }
@@ -660,6 +697,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
                if (c->has_presub) {
                        if(peephole_add_presub_inv(c, inst))
                                return 1;
+                       if(peephole_add_presub_add(c, inst))
+                               return 1;
                }
                break;
        default: