radeon_dataflow.c \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
+ radeon_optimize.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
debug_program_log(c, "after deadcode");
+ rc_optimize(&c->Base);
+
+ debug_program_log(c, "after dataflow optimize");
+
rc_dataflow_swizzles(&c->Base);
if (c->Base.Error)
return;
#include "radeon_program.h"
-static void reads_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
refmask &= RC_MASK_XYZW;
- for(unsigned int chan = 0; chan < 4; ++chan) {
- if (GET_BIT(refmask, chan)) {
- cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, chan);
- }
- }
+ if (refmask)
+ cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
if (refmask && inst->SrcReg[src].RelAddr)
cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
}
-static void reads_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
unsigned int refmasks[3] = { 0, 0, 0 };
}
for(unsigned int src = 0; src < 3; ++src) {
- if (inst->RGB.Src[src].Used) {
- for(unsigned int chan = 0; chan < 3; ++chan) {
- if (GET_BIT(refmasks[src], chan))
- cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, chan);
- }
- }
+ if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+ cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+ refmasks[src] & RC_MASK_XYZ);
- if (inst->Alpha.Src[src].Used) {
- if (GET_BIT(refmasks[src], 3))
- cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 3);
- }
+ if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+ cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
}
}
/**
- * Calls a callback function for all sourced register channels.
+ * Calls a callback function for all register reads.
*
- * This is conservative, i.e. channels may be called multiple times,
- * and the writemask of the instruction is not taken into account.
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
*/
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
reads_normal(inst, cb, userdata);
-static void writes_normal(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_sub_instruction * inst = &fullinst->U.I;
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
- if (opcode->HasDstReg) {
- for(unsigned int chan = 0; chan < 4; ++chan) {
- if (GET_BIT(inst->DstReg.WriteMask, chan))
- cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, chan);
- }
- }
+ if (opcode->HasDstReg && inst->DstReg.WriteMask)
+ cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
if (inst->WriteALUResult)
- cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
-static void writes_pair(struct rc_instruction * fullinst, rc_read_write_fn cb, void * userdata)
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
{
struct rc_pair_instruction * inst = &fullinst->U.P;
- for(unsigned int chan = 0; chan < 3; ++chan) {
- if (GET_BIT(inst->RGB.WriteMask, chan))
- cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, chan);
- }
+ if (inst->RGB.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
if (inst->Alpha.WriteMask)
- cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, 3);
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
if (inst->WriteALUResult)
- cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, 0);
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
}
/**
- * Calls a callback function for all written register channels.
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
*
* \warning Does not report output registers for paired instructions!
*/
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata)
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
{
if (inst->Type == RC_INSTRUCTION_NORMAL) {
writes_normal(inst, cb, userdata);
}
+struct mask_to_chan_data {
+ void * UserData;
+ rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct mask_to_chan_data * d = data;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(mask, chan))
+ d->Fn(d->UserData, inst, file, index, chan);
+ }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
static void remap_normal_instruction(struct rc_instruction * fullinst,
rc_remap_register_fn cb, void * userdata)
{
* Help analyze and modify the register accesses of instructions.
*/
/*@{*/
-typedef void (*rc_read_write_fn)(void * userdata, struct rc_instruction * inst,
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int chan);
-void rc_for_all_reads(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
-void rc_for_all_writes(struct rc_instruction * inst, rc_read_write_fn cb, void * userdata);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
rc_register_file * pfile, unsigned int * pindex);
void rc_dataflow_swizzles(struct radeon_compiler * c);
/*@}*/
+void rc_optimize(struct radeon_compiler * c);
+
#endif /* RADEON_DATAFLOW_H */
sap.Proxies = proxies;
for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
- rc_for_all_writes(inst, scan_write, &sap);
+ rc_for_all_writes_mask(inst, scan_write, &sap);
rc_remap_registers(inst, remap_proxy_function, &sap);
}
--- /dev/null
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+ struct rc_src_register combine;
+ combine.File = inner.File;
+ combine.Index = inner.Index;
+ combine.RelAddr = inner.RelAddr;
+ if (outer.Abs) {
+ combine.Abs = 1;
+ combine.Negate = outer.Negate;
+ } else {
+ combine.Abs = inner.Abs;
+ combine.Negate = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(outer.Swizzle, chan);
+ if (swz < 4)
+ combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
+ }
+ combine.Negate ^= outer.Negate;
+ }
+ combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+ return combine;
+}
+
+struct peephole_state {
+ struct radeon_compiler * C;
+ struct rc_instruction * Mov;
+ unsigned int Conflict:1;
+
+ /** Whether Mov's source has been clobbered */
+ unsigned int SourceClobbered:1;
+
+ /** Which components of Mov's destination register are still from that Mov? */
+ unsigned int MovMask:4;
+
+ /** Which components of Mov's destination register are clearly *not* from that Mov */
+ unsigned int DefinedMask:4;
+
+ /** Which components of Mov's source register are sourced */
+ unsigned int SourcedMask:4;
+
+ /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
+ int BranchDepth;
+};
+
+static void peephole_scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+ return;
+
+ if ((mask & s->MovMask) == mask) {
+ if (s->SourceClobbered) {
+ s->Conflict = 1;
+ }
+ } else if ((mask & s->DefinedMask) == mask) {
+ /* read from something entirely written by other instruction: this is okay */
+ } else {
+ /* read from component combination that is not well-defined without
+ * the MOV: cannot remove it */
+ s->Conflict = 1;
+ }
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (s->BranchDepth < 0)
+ return;
+
+ if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
+ s->MovMask &= ~mask;
+ if (s->BranchDepth == 0)
+ s->DefinedMask |= mask;
+ else
+ s->DefinedMask &= ~mask;
+ } else if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+ if (mask & s->SourcedMask)
+ s->SourceClobbered = 1;
+ } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
+ s->SourceClobbered = 1;
+ }
+}
+
+static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+ struct peephole_state s;
+
+ if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
+ return;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.Mov = inst_mov;
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
+ s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ /* 1st pass: Check whether all subsequent readers can be changed */
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, peephole_scan_read, &s);
+ rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+ if (s.Conflict)
+ return;
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0) {
+ s.DefinedMask &= ~s.MovMask;
+ s.MovMask = 0;
+ }
+ }
+ }
+ }
+
+ if (s.Conflict)
+ return;
+
+ /* 2nd pass: We can satisfy all readers, so switch them over all at once */
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.BranchDepth = 0;
+
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
+ unsigned int refmask = 0;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ refmask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ if ((refmask & s.MovMask) == refmask)
+ inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+ inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
+ s.MovMask &= ~inst->U.I.DstReg.WriteMask;
+ }
+ }
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0)
+ break; /* no more readers after this point */
+ }
+ }
+ }
+
+ /* Finally, remove the original MOV instruction */
+ rc_remove_instruction(inst_mov);
+}
+
+void rc_optimize(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+
+ if (cur->U.I.Opcode == RC_OPCODE_MOV)
+ peephole(c, cur);
+ }
+}
}
static void scan_callback(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int chan)
+ rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
- rc_for_all_reads(inst, scan_callback, s);
- rc_for_all_writes(inst, scan_callback, s);
+ rc_for_all_reads_mask(inst, scan_callback, s);
+ rc_for_all_writes_mask(inst, scan_callback, s);
}
}
* counter-intuitive, to account for the case where an
* instruction writes to the same register as it reads
* from. */
- rc_for_all_writes(inst, &scan_write, &s);
- rc_for_all_reads(inst, &scan_read, &s);
+ rc_for_all_writes_chan(inst, &scan_write, &s);
+ rc_for_all_reads_chan(inst, &scan_read, &s);
DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);