r300/compiler: Add peephole optimization for the 'add' presubtract operation
authorTom Stellard <tstellar@gmail.com>
Mon, 30 Aug 2010 15:59:30 +0000 (08:59 -0700)
committerTom Stellard <tstellar@gmail.com>
Sat, 11 Sep 2010 01:18:09 +0000 (18:18 -0700)
src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c

index e01ba85aa56503f8cade8a6a5cf905e984033800..3ff07d60396dcb898fb43205b5162c15e4c44821 100644 (file)
@@ -38,6 +38,10 @@ struct peephole_state {
        unsigned int WriteMask;
 };
 
+typedef void (*rc_presub_replace_fn)(struct peephole_state *,
+                                               struct rc_instruction *,
+                                               unsigned int);
+
 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
 {
        struct rc_src_register combine;
@@ -516,68 +520,26 @@ static void peephole_scan_write(void * data, struct rc_instruction * inst,
        }
 }
 
-/**
- * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
- * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
- * of the add instruction must have the constatnt 1 swizzle.  This function
- * does not check const registers to see if their value is 1.0, so it should
- * be called after the constant_folding optimization.
- * @return 
- *     0 if the ADD instruction is still part of the program.
- *     1 if the ADD instruction is no longer part of the program.
- */
-static int peephole_add_presub_inv(
+static int presub_helper(
        struct radeon_compiler * c,
-       struct rc_instruction * inst_add)
+       struct peephole_state * s,
+       rc_presubtract_op presub_opcode,
+       rc_presub_replace_fn presub_replace)
 {
-       unsigned int i, swz, mask;
+       struct rc_instruction * inst;
        unsigned int can_remove = 0;
        unsigned int cant_sub = 0;
-       struct rc_instruction * inst;
-       struct peephole_state s;
-
-       if (inst_add->U.I.SaturateMode)
-               return 0;
-
-       mask = inst_add->U.I.DstReg.WriteMask;
-
-       /* Check if src0 is 1. */
-       /* XXX It would be nice to use is_src_uniform_constant here, but that
-        * function only works if the register's file is RC_FILE_NONE */
-       for(i = 0; i < 4; i++ ) {
-               swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
-               if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
-                                               && swz != RC_SWIZZLE_ONE) {
-                       return 0;
-               }
-       }
 
-       /* Check src1. */
-       if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
-                                               inst_add->U.I.DstReg.WriteMask
-               || inst_add->U.I.SrcReg[1].Abs
-               || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
-                       && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
-               || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
-
-               return 0;
-       }
-
-       /* Setup the peephole_state information. */
-       s.Inst = inst_add;
-       s.WriteMask = inst_add->U.I.DstReg.WriteMask;
-
-       /* For all instructions that read inst_add->U.I.DstReg before it is
-        * written again, use the 1 - src0 presubtact instead. */
-       for(inst = inst_add->Next; inst != &c->Program.Instructions;
+       for(inst = s->Inst->Next; inst != &c->Program.Instructions;
                                                        inst = inst->Next) {
+               unsigned int i;
                const struct rc_opcode_info * info =
                                        rc_get_opcode_info(inst->U.I.Opcode);
 
                for(i = 0; i < info->NumSrcRegs; i++) {
-                       if(inst_add->U.I.DstReg.WriteMask !=
+                       if(s->Inst->U.I.DstReg.WriteMask !=
                                        src_reads_dst_mask(inst->U.I.SrcReg[i],
-                                               inst_add->U.I.DstReg)) {
+                                               s->Inst->U.I.DstReg)) {
                                continue;
                        }
                        if (cant_sub) {
@@ -601,47 +563,173 @@ static int peephole_add_presub_inv(
                         * instruction, unless the two prsubtract operations
                         * are the same and read from the same registers. */
                        if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE) {
-                               if (inst->U.I.PreSub.Opcode != RC_PRESUB_INV
+                               if (inst->U.I.PreSub.Opcode != presub_opcode
                                        || inst->U.I.PreSub.SrcReg[0].File !=
-                                               inst_add->U.I.SrcReg[1].File
+                                               s->Inst->U.I.SrcReg[1].File
                                        || inst->U.I.PreSub.SrcReg[0].Index !=
-                                               inst_add->U.I.SrcReg[1].Index) {
+                                               s->Inst->U.I.SrcReg[1].Index) {
 
                                        can_remove = 0;
                                        break;
                                }
                        }
-                       /* We must be careful not to modify inst_add, since it
-                        * is possible it will remain part of the program. */
-                       inst->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
-                       inst->U.I.PreSub.SrcReg[0].Negate = 0;
-                       inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
-                       inst->U.I.SrcReg[i] = chain_srcregs(inst->U.I.SrcReg[i],
-                                               inst->U.I.PreSub.SrcReg[0]);
-
-                       inst->U.I.SrcReg[i].File = RC_FILE_PRESUB;
-                       inst->U.I.SrcReg[i].Index = RC_PRESUB_INV;
+                       presub_replace(s, inst, i);
                        can_remove = 1;
                }
                if(!can_remove)
                        break;
-               rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+               rc_for_all_writes_mask(inst, peephole_scan_write, s);
                /* If all components of inst_add's destination register have
                 * been written to by subsequent instructions, the original
                 * value of the destination register is no longer valid and
                 * we can't keep doing substitutions. */
-               if (!s.WriteMask){
+               if (!s->WriteMask){
                        break;
                }
                /* Make this instruction doesn't write to the presubtract source. */
                if (inst->U.I.DstReg.WriteMask &
-                               src_reads_dst_mask(inst_add->U.I.SrcReg[1],
+                               src_reads_dst_mask(s->Inst->U.I.SrcReg[1],
                                                        inst->U.I.DstReg)
                                || info->IsFlowControl) {
                        cant_sub = 1;
                }
        }
-       if(can_remove) {
+       return can_remove;
+}
+
+static void presub_replace_add(struct peephole_state *s,
+                                               struct rc_instruction * inst,
+                                               unsigned int src_index)
+{
+       inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[0];
+       inst->U.I.PreSub.SrcReg[1] = s->Inst->U.I.SrcReg[1];
+       inst->U.I.PreSub.SrcReg[0].Negate = 0;
+       inst->U.I.PreSub.SrcReg[1].Negate = 0;
+       inst->U.I.PreSub.Opcode = RC_PRESUB_ADD;
+       inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+                                               inst->U.I.PreSub.SrcReg[0]);
+       inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+       inst->U.I.SrcReg[src_index].Index = RC_PRESUB_ADD;
+}
+
+static int peephole_add_presub_add(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_add)
+{
+       struct rc_src_register * src0 = NULL;
+       struct rc_src_register * src1 = NULL;
+       unsigned int i;
+       struct peephole_state s;
+
+       if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+               return 0;
+
+       if (inst_add->U.I.SaturateMode)
+               return 0;
+
+       if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
+               return 0;
+
+       /* src0 and src1 can't have absolute values only one can be negative and they must be all negative or all positive. */
+       for (i = 0; i < 2; i++) {
+               if (inst_add->U.I.SrcReg[i].Abs)
+                       return 0;
+               if ((inst_add->U.I.SrcReg[i].Negate
+                                       & inst_add->U.I.DstReg.WriteMask) ==
+                                               inst_add->U.I.DstReg.WriteMask) {
+                       src0 = &inst_add->U.I.SrcReg[i];
+               } else if (!src1) {
+                       src1 = &inst_add->U.I.SrcReg[i];
+               } else {
+                       src0 = &inst_add->U.I.SrcReg[i];
+               }
+       }
+
+       if (!src1)
+               return 0;
+
+       /* XXX Only do add for now. */
+       if (src0->Negate)
+               return 0;
+
+       s.Inst = inst_add;
+       s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+       if (presub_helper(c, &s, RC_PRESUB_ADD, presub_replace_add)) {
+               rc_remove_instruction(inst_add);
+               return 1;
+       }
+       return 0;
+}
+
+static void presub_replace_inv(struct peephole_state * s,
+                                               struct rc_instruction * inst,
+                                               unsigned int src_index)
+{
+       /* We must be careful not to modify s->Inst, since it
+        * is possible it will remain part of the program. 
+        * XXX Maybe pass a struct instead of a pointer for s->Inst.*/
+       inst->U.I.PreSub.SrcReg[0] = s->Inst->U.I.SrcReg[1];
+       inst->U.I.PreSub.SrcReg[0].Negate = 0;
+       inst->U.I.PreSub.Opcode = RC_PRESUB_INV;
+       inst->U.I.SrcReg[src_index] = chain_srcregs(inst->U.I.SrcReg[src_index],
+                                               inst->U.I.PreSub.SrcReg[0]);
+
+       inst->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
+       inst->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
+}
+
+/**
+ * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
+ * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
+ * of the add instruction must have the constatnt 1 swizzle.  This function
+ * does not check const registers to see if their value is 1.0, so it should
+ * be called after the constant_folding optimization.
+ * @return 
+ *     0 if the ADD instruction is still part of the program.
+ *     1 if the ADD instruction is no longer part of the program.
+ */
+static int peephole_add_presub_inv(
+       struct radeon_compiler * c,
+       struct rc_instruction * inst_add)
+{
+       unsigned int i, swz, mask;
+       struct peephole_state s;
+
+       if (inst_add->U.I.PreSub.Opcode != RC_PRESUB_NONE)
+               return 0;
+
+       if (inst_add->U.I.SaturateMode)
+               return 0;
+
+       mask = inst_add->U.I.DstReg.WriteMask;
+
+       /* Check if src0 is 1. */
+       /* XXX It would be nice to use is_src_uniform_constant here, but that
+        * function only works if the register's file is RC_FILE_NONE */
+       for(i = 0; i < 4; i++ ) {
+               swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
+               if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
+                                               && swz != RC_SWIZZLE_ONE) {
+                       return 0;
+               }
+       }
+
+       /* Check src1. */
+       if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
+                                               inst_add->U.I.DstReg.WriteMask
+               || inst_add->U.I.SrcReg[1].Abs
+               || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
+                       && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
+               || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
+
+               return 0;
+       }
+
+       /* Setup the peephole_state information. */
+       s.Inst = inst_add;
+       s.WriteMask = inst_add->U.I.DstReg.WriteMask;
+
+       if (presub_helper(c, &s, RC_PRESUB_INV, presub_replace_inv)) {
                rc_remove_instruction(inst_add);
                return 1;
        }
@@ -660,6 +748,8 @@ static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
                if (c->has_presub) {
                        if(peephole_add_presub_inv(c, inst))
                                return 1;
+                       if(peephole_add_presub_add(c, inst))
+                               return 1;
                }
                break;
        default:
index 32c54fd74bc469021756787527b3bb95a57b41f2..5269d65985138a267162587312ceedc1a14257d2 100644 (file)
@@ -300,6 +300,7 @@ static int destructive_merge_instructions(
                for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
                        unsigned int arg;
                        int free_source;
+                       unsigned int one_way = 0;
                        struct radeon_pair_instruction_source srcp =
                                                alpha->RGB.Src[srcp_src];
                        struct radeon_pair_instruction_source temp;
@@ -307,14 +308,27 @@ static int destructive_merge_instructions(
                         * 3rd arg of 0 means this is not an alpha source. */
                        free_source = rc_pair_alloc_source(rgb, 1, 0,
                                                        srcp.File, srcp.Index);
-                       /* If free_source == srcp_src, then either the
-                        * presubtract source is already in the correct place. */
-                       if (free_source == srcp_src)
-                               continue;
                        /* If free_source < 0 then there are no free source
                         * slots. */
                        if (free_source < 0)
                                return 0;
+
+                       temp = rgb->RGB.Src[srcp_src];
+                       rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
+                       /* srcp needs src0 and src1 to be the same */
+                       if (free_source < srcp_src) {
+                               if (!temp.Used)
+                                       continue;
+                               free_source = rc_pair_alloc_source(rgb, 1, 0,
+                                                       srcp.File, srcp.Index);
+                               one_way = 1;
+                       } else {
+                               rgb->RGB.Src[free_source] = temp;
+                       }
+                       /* If free_source == srcp_src, then the presubtract
+                        * source is already in the correct place. */
+                       if (free_source == srcp_src)
+                               continue;
                        /* Shuffle the sources, so we can put the
                         * presubtract source in the correct place. */
                        for (arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -331,12 +345,11 @@ static int destructive_merge_instructions(
                                /* We need to do this just in case register
                                 * is one of the sources already, but in the
                                 * wrong spot. */
-                               else if(rgb->RGB.Arg[arg].Source == free_source)
+                               else if(rgb->RGB.Arg[arg].Source == free_source
+                                                               && !one_way) {
                                        rgb->RGB.Arg[arg].Source = srcp_src;
+                               }
                        }
-                       temp = rgb->RGB.Src[srcp_src];
-                       rgb->RGB.Src[srcp_src] = rgb->RGB.Src[free_source];
-                       rgb->RGB.Src[free_source] = temp;
                }
        }
 
@@ -352,6 +365,7 @@ static int destructive_merge_instructions(
                for(srcp_src = 0; srcp_src < srcp_regs; srcp_src++) {
                        unsigned int arg;
                        int free_source;
+                       unsigned int one_way = 0;
                        struct radeon_pair_instruction_source srcp =
                                                alpha->Alpha.Src[srcp_src];
                        struct radeon_pair_instruction_source temp;
@@ -359,14 +373,27 @@ static int destructive_merge_instructions(
                         * 3rd arg of 1 means this is an alpha source. */
                        free_source = rc_pair_alloc_source(rgb, 0, 1,
                                                        srcp.File, srcp.Index);
-                       /* If free_source == srcp_src, then either the
-                        * presubtract source is already in the correct place. */
-                       if (free_source == srcp_src)
-                               continue;
                        /* If free_source < 0 then there are no free source
                         * slots. */
                        if (free_source < 0)
                                return 0;
+
+                       temp = rgb->Alpha.Src[srcp_src];
+                       rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
+                       /* srcp needs src0 and src1 to be the same. */
+                       if (free_source < srcp_src) {
+                               if (!temp.Used)
+                                       continue;
+                               free_source = rc_pair_alloc_source(rgb, 0, 1,
+                                                       temp.File, temp.Index);
+                               one_way = 1;
+                       } else {
+                               rgb->Alpha.Src[free_source] = temp;
+                       }
+                       /* If free_source == srcp_src, then the presubtract
+                        * source is already in the correct place. */
+                       if (free_source == srcp_src)
+                               continue;
                        /* Shuffle the sources, so we can put the
                         * presubtract source in the correct place. */
                        for(arg = 0; arg < rgb_info->NumSrcRegs; arg++) {
@@ -380,12 +407,11 @@ static int destructive_merge_instructions(
                                }
                                if (rgb->RGB.Arg[arg].Source == srcp_src)
                                        rgb->RGB.Arg[arg].Source = free_source;
-                               else if (rgb->RGB.Arg[arg].Source == free_source)
+                               else if (rgb->RGB.Arg[arg].Source == free_source
+                                                               && !one_way) {
                                        rgb->RGB.Arg[arg].Source = srcp_src;
+                               }
                        }
-                       temp = rgb->Alpha.Src[srcp_src];
-                       rgb->Alpha.Src[srcp_src] = rgb->Alpha.Src[free_source];
-                       rgb->Alpha.Src[free_source] = temp;
                }
        }