r300/compiler: Implement simple peephole optimizer
authorNicolai Hähnle <nhaehnle@gmail.com>
Sun, 11 Oct 2009 14:13:02 +0000 (16:13 +0200)
committerMarek Olšák <maraeo@gmail.com>
Sun, 16 May 2010 18:15:53 +0000 (20:15 +0200)
Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
src/mesa/drivers/dri/r300/compiler/Makefile
src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
src/mesa/drivers/dri/r300/compiler/radeon_optimize.c [new file with mode: 0644]
src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c

index e432afc3d41eaf002f0c5a09fb334e09d295d8f4..34d22b45591117f00751ce53910e2d77f2104f59 100644 (file)
@@ -21,6 +21,7 @@ C_SOURCES = \
                radeon_dataflow.c \
                radeon_dataflow_deadcode.c \
                radeon_dataflow_swizzles.c \
+               radeon_optimize.c \
                r3xx_fragprog.c \
                r300_fragprog.c \
                r300_fragprog_swizzle.c \
index 25bf373b6fde682c7211785790f307c21eeec016..3e88ccbc46de9c8ef791d196bc22f2d302b8214a 100644 (file)
@@ -152,6 +152,10 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
 
        debug_program_log(c, "after deadcode");
 
+       rc_optimize(&c->Base);
+
+       debug_program_log(c, "after dataflow optimize");
+
        rc_dataflow_swizzles(&c->Base);
        if (c->Base.Error)
                return;
index 16e2f3a218143ff19b97ee611c2509d7cc0c9424..0e6c62541fa46f39bee641fef2d965f745fbe290 100644 (file)
@@ -30,7 +30,7 @@
 #include "radeon_program.h"
 
 
-static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
 {
        struct rc_sub_instruction * inst = &fullinst->U.I;
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
@@ -46,18 +46,15 @@ static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb,
 
                refmask &= RC_MASK_XYZW;
 
-               for(unsigned int chan = 0; chan < 4; ++chan) {
-                       if (GET_BIT(refmask, chan)) {
-                               cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan);
-                       }
-               }
+               if (refmask)
+                       cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
 
                if (refmask && inst->SrcReg[src].RelAddr)
                        cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
        }
 }
 
-static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_fn cb, void * userdata)
+static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_mask_fn cb, void * userdata)
 {
        struct rc_pair_instruction * inst = &fullinst->U.P;
        unsigned int refmasks[3] = { 0, 0, 0 };
@@ -84,27 +81,23 @@ static void reads_pair(struct rc_instruction * fullinst,  rc_read_write_fn cb, v
        }
 
        for(unsigned int src = 0; src < 3; ++src) {
-               if (inst->RGB.Src[src].Used) {
-                       for(unsigned int chan = 0; chan < 3; ++chan) {
-                               if (GET_BIT(refmasks[src], chan))
-                                       cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
-                       }
-               }
+               if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+                       cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+                          refmasks[src] & RC_MASK_XYZ);
 
-               if (inst->Alpha.Src[src].Used) {
-                       if (GET_BIT(refmasks[src], 3))
-                               cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
-               }
+               if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+                       cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
        }
 }
 
 /**
- * Calls a callback function for all sourced register channels.
+ * Calls a callback function for all register reads.
  *
- * This is conservative, i.e. channels may be called multiple times,
- * and the writemask of the instruction is not taken into account.
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
  */
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
 {
        if (inst->Type == RC_INSTRUCTION_NORMAL) {
                reads_normal(inst, cb, userdata);
@@ -115,44 +108,39 @@ void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void *
 
 
 
-static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
 {
        struct rc_sub_instruction * inst = &fullinst->U.I;
        const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
 
-       if (opcode->HasDstReg) {
-               for(unsigned int chan = 0; chan < 4; ++chan) {
-                       if (GET_BIT(inst->DstReg.WriteMask, chan))
-                               cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan);
-               }
-       }
+       if (opcode->HasDstReg && inst->DstReg.WriteMask)
+               cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
 
        if (inst->WriteALUResult)
-               cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+               cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
 }
 
-static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
 {
        struct rc_pair_instruction * inst = &fullinst->U.P;
 
-       for(unsigned int chan = 0; chan < 3; ++chan) {
-               if (GET_BIT(inst->RGB.WriteMask, chan))
-                       cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan);
-       }
+       if (inst->RGB.WriteMask)
+               cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
 
        if (inst->Alpha.WriteMask)
-               cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3);
+               cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
 
        if (inst->WriteALUResult)
-               cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+               cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
 }
 
 /**
- * Calls a callback function for all written register channels.
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
  *
  * \warning Does not report output registers for paired instructions!
  */
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
 {
        if (inst->Type == RC_INSTRUCTION_NORMAL) {
                writes_normal(inst, cb, userdata);
@@ -162,6 +150,48 @@ void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void *
 }
 
 
+struct mask_to_chan_data {
+       void * UserData;
+       rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct mask_to_chan_data * d = data;
+       for(unsigned int chan = 0; chan < 4; ++chan) {
+               if (GET_BIT(mask, chan))
+                       d->Fn(d->UserData, inst, file, index, chan);
+       }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+       struct mask_to_chan_data d;
+       d.UserData = userdata;
+       d.Fn = cb;
+       rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+       struct mask_to_chan_data d;
+       d.UserData = userdata;
+       d.Fn = cb;
+       rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
 static void remap_normal_instruction(struct rc_instruction * fullinst,
                rc_remap_register_fn cb, void * userdata)
 {
index 62cda20eea686c67ee251f9c6d397ef06e4dad6e..60a6e192a9f7371aebcd8f34b74883065e28f34b 100644 (file)
@@ -39,10 +39,15 @@ struct rc_swizzle_caps;
  * Help analyze and modify the register accesses of instructions.
  */
 /*@{*/
-typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
                        rc_register_file file, unsigned int index, unsigned int chan);
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+                       rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
 
 typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
                        rc_register_file * pfile, unsigned int * pindex);
@@ -60,4 +65,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
 void rc_dataflow_swizzles(struct radeon_compiler * c);
 /*@}*/
 
+void rc_optimize(struct radeon_compiler * c);
+
 #endif /* RADEON_DATAFLOW_H */
index d889612f4f4eb244b8b20c47316f49af642daf1c..863654cf6850651262f5cbfe72f6842063d25b56 100644 (file)
@@ -150,7 +150,7 @@ static void allocate_and_insert_proxies(struct emulate_branch_state * s,
        sap.Proxies = proxies;
 
        for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
-               rc_for_all_writes(inst, scan_write, &sap);
+               rc_for_all_writes_mask(inst, scan_write, &sap);
                rc_remap_registers(inst, remap_proxy_function, &sap);
        }
 
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
new file mode 100644 (file)
index 0000000..80e3eea
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+       struct rc_src_register combine;
+       combine.File = inner.File;
+       combine.Index = inner.Index;
+       combine.RelAddr = inner.RelAddr;
+       if (outer.Abs) {
+               combine.Abs = 1;
+               combine.Negate = outer.Negate;
+       } else {
+               combine.Abs = inner.Abs;
+               combine.Negate = 0;
+               for(unsigned int chan = 0; chan < 4; ++chan) {
+                       unsigned int swz = GET_SWZ(outer.Swizzle, chan);
+                       if (swz < 4)
+                               combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
+               }
+               combine.Negate ^= outer.Negate;
+       }
+       combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+       return combine;
+}
+
+struct peephole_state {
+       struct radeon_compiler * C;
+       struct rc_instruction * Mov;
+       unsigned int Conflict:1;
+
+       /** Whether Mov's source has been clobbered */
+       unsigned int SourceClobbered:1;
+
+       /** Which components of Mov's destination register are still from that Mov? */
+       unsigned int MovMask:4;
+
+       /** Which components of Mov's destination register are clearly *not* from that Mov */
+       unsigned int DefinedMask:4;
+
+       /** Which components of Mov's source register are sourced */
+       unsigned int SourcedMask:4;
+
+       /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
+       int BranchDepth;
+};
+
+static void peephole_scan_read(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct peephole_state * s = data;
+
+       if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+               return;
+
+       if ((mask & s->MovMask) == mask) {
+               if (s->SourceClobbered) {
+                       s->Conflict = 1;
+               }
+       } else if ((mask & s->DefinedMask) == mask) {
+               /* read from something entirely written by other instruction: this is okay */
+       } else {
+               /* read from component combination that is not well-defined without
+                * the MOV: cannot remove it */
+               s->Conflict = 1;
+       }
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+               rc_register_file file, unsigned int index, unsigned int mask)
+{
+       struct peephole_state * s = data;
+
+       if (s->BranchDepth < 0)
+               return;
+
+       if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
+               s->MovMask &= ~mask;
+               if (s->BranchDepth == 0)
+                       s->DefinedMask |= mask;
+               else
+                       s->DefinedMask &= ~mask;
+       } else if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+               if (mask & s->SourcedMask)
+                       s->SourceClobbered = 1;
+       } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
+               s->SourceClobbered = 1;
+       }
+}
+
+static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+       struct peephole_state s;
+
+       if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
+               return;
+
+       memset(&s, 0, sizeof(s));
+       s.C = c;
+       s.Mov = inst_mov;
+       s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+       s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+
+       for(unsigned int chan = 0; chan < 4; ++chan) {
+               unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
+               s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
+       }
+
+       /* 1st pass: Check whether all subsequent readers can be changed */
+       for(struct rc_instruction * inst = inst_mov->Next;
+           inst != &c->Program.Instructions;
+           inst = inst->Next) {
+               rc_for_all_reads_mask(inst, peephole_scan_read, &s);
+               rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+               if (s.Conflict)
+                       return;
+
+               if (s.BranchDepth >= 0) {
+                       if (inst->U.I.Opcode == RC_OPCODE_IF) {
+                               s.BranchDepth++;
+                       } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+                               s.BranchDepth--;
+                               if (s.BranchDepth < 0) {
+                                       s.DefinedMask &= ~s.MovMask;
+                                       s.MovMask = 0;
+                               }
+                       }
+               }
+       }
+
+       if (s.Conflict)
+               return;
+
+       /* 2nd pass: We can satisfy all readers, so switch them over all at once */
+       s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+       s.BranchDepth = 0;
+
+       for(struct rc_instruction * inst = inst_mov->Next;
+           inst != &c->Program.Instructions;
+           inst = inst->Next) {
+               const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+               for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+                       if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
+                           inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
+                               unsigned int refmask = 0;
+
+                               for(unsigned int chan = 0; chan < 4; ++chan) {
+                                       unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+                                       refmask |= (1 << swz) & RC_MASK_XYZW;
+                               }
+
+                               if ((refmask & s.MovMask) == refmask)
+                                       inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+                       }
+               }
+
+               if (opcode->HasDstReg) {
+                       if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+                           inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
+                               s.MovMask &= ~inst->U.I.DstReg.WriteMask;
+                       }
+               }
+
+               if (s.BranchDepth >= 0) {
+                       if (inst->U.I.Opcode == RC_OPCODE_IF) {
+                               s.BranchDepth++;
+                       } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+                               s.BranchDepth--;
+                               if (s.BranchDepth < 0)
+                                       break; /* no more readers after this point */
+                       }
+               }
+       }
+
+       /* Finally, remove the original MOV instruction */
+       rc_remove_instruction(inst_mov);
+}
+
+void rc_optimize(struct radeon_compiler * c)
+{
+       struct rc_instruction * inst = c->Program.Instructions.Next;
+       while(inst != &c->Program.Instructions) {
+               struct rc_instruction * cur = inst;
+               inst = inst->Next;
+
+               if (cur->U.I.Opcode == RC_OPCODE_MOV)
+                       peephole(c, cur);
+       }
+}
index fdfee86701446721864be30df0b06ec561032ad7..8a912da461302f8da66753e58ff502b221d9a578 100644 (file)
@@ -159,7 +159,7 @@ static int try_add_live_intervals(struct regalloc_state * s,
 }
 
 static void scan_callback(void * data, struct rc_instruction * inst,
-               rc_register_file file, unsigned int index, unsigned int chan)
+               rc_register_file file, unsigned int index, unsigned int mask)
 {
        struct regalloc_state * s = data;
        struct register_info * reg;
@@ -191,8 +191,8 @@ static void compute_live_intervals(struct regalloc_state * s)
        for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
            inst != &s->C->Program.Instructions;
            inst = inst->Next) {
-               rc_for_all_reads(inst, scan_callback, s);
-               rc_for_all_writes(inst, scan_callback, s);
+               rc_for_all_reads_mask(inst, scan_callback, s);
+               rc_for_all_writes_mask(inst, scan_callback, s);
        }
 }
 
index df67aafe02884e739f376e3e2e48ef1a9bd85770..a279549ff8940fdcaff9faf19aaa850c46901f05 100644 (file)
@@ -448,8 +448,8 @@ static void schedule_block(struct r300_fragment_program_compiler * c,
                 * counter-intuitive, to account for the case where an
                 * instruction writes to the same register as it reads
                 * from. */
-               rc_for_all_writes(inst, &scan_write, &s);
-               rc_for_all_reads(inst, &scan_read, &s);
+               rc_for_all_writes_chan(inst, &scan_write, &s);
+               rc_for_all_reads_chan(inst, &scan_read, &s);
 
                DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);