C_SOURCES = \
radeon_code.c \
radeon_compiler.c \
+ radeon_compiler_util.c \
radeon_emulate_branches.c \
radeon_emulate_loops.c \
radeon_program.c \
source = [
'radeon_code.c',
'radeon_compiler.c',
+ 'radeon_compiler_util.c',
'radeon_program.c',
'radeon_program_print.c',
'radeon_opcodes.c',
--- /dev/null
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_compiler_util.h"
+
+/**
+ */
+unsigned int rc_swizzle_to_writemask(unsigned int swz)
+{
+ unsigned int mask = 0;
+ unsigned int i;
+
+ for(i = 0; i < 4; i++) {
+ mask |= 1 << GET_SWZ(swz, i);
+ }
+ mask &= RC_MASK_XYZW;
+
+ return mask;
+}
+
+unsigned int rc_src_reads_dst_mask(
+ rc_register_file src_file,
+ unsigned int src_idx,
+ unsigned int src_swz,
+ rc_register_file dst_file,
+ unsigned int dst_idx,
+ unsigned int dst_mask)
+{
+ if (src_file != dst_file || src_idx != dst_idx) {
+ return RC_MASK_NONE;
+ }
+ return dst_mask & rc_swizzle_to_writemask(src_swz);
+}
--- /dev/null
+#include "radeon_program_constants.h"
+
+#ifndef RADEON_PROGRAM_UTIL_H
+#define RADEON_PROGRAM_UTIL_H
+
+unsigned int rc_swizzle_to_writemask(unsigned int swz);
+
+unsigned int rc_src_reads_dst_mask(
+ rc_register_file src_file,
+ unsigned int src_idx,
+ unsigned int src_swz,
+ rc_register_file dst_file,
+ unsigned int dst_idx,
+ unsigned int dst_mask);
+
+#endif /* RADEON_PROGRAM_UTIL_H */
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
#include "radeon_dataflow.h"
+#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_program.h"
struct read_write_mask_data {
else
remap_pair_instruction(inst, cb, userdata);
}
+
+/**
+ * @return RC_OPCODE_NOOP if inst is not a flow control instruction.
+ * @return The opcode of inst if it is a flow control instruction.
+ */
+static rc_opcode get_flow_control_inst(struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * info;
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ info = rc_get_opcode_info(inst->U.I.Opcode);
+ } else {
+ info = rc_get_opcode_info(inst->U.P.RGB.Opcode);
+ /*A flow control instruction shouldn't have an alpha
+ * instruction.*/
+ assert(!info->IsFlowControl ||
+ inst->U.P.Alpha.Opcode == RC_OPCODE_NOP);
+ }
+
+ if (info->IsFlowControl)
+ return info->Opcode;
+ else
+ return RC_OPCODE_NOP;
+
+}
+
+struct get_readers_callback_data {
+ struct radeon_compiler * C;
+ struct rc_reader_data * ReaderData;
+ rc_read_src_fn ReadCB;
+ rc_read_write_mask_fn WriteCB;
+ unsigned int AliveWriteMask;
+};
+
+static void add_reader(
+ struct memory_pool * pool,
+ struct rc_reader_data * data,
+ struct rc_instruction * inst,
+ unsigned int mask,
+ struct rc_src_register * src)
+{
+ struct rc_reader * new;
+ memory_pool_array_reserve(pool, struct rc_reader, data->Readers,
+ data->ReaderCount, data->ReadersReserved, 1);
+ new = &data->Readers[data->ReaderCount++];
+ new->Inst = inst;
+ new->WriteMask = mask;
+ new->Src = src;
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine whether inst
+ * is a reader of userdata->ReaderData->Writer
+ */
+static void get_readers_normal_read_callback(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ struct get_readers_callback_data * d = userdata;
+ unsigned int read_mask;
+
+ if (src->RelAddr)
+ d->ReaderData->Abort = 1;
+
+ unsigned int shared_mask = rc_src_reads_dst_mask(src->File, src->Index,
+ src->Swizzle,
+ d->ReaderData->Writer->U.I.DstReg.File,
+ d->ReaderData->Writer->U.I.DstReg.Index,
+ d->AliveWriteMask);
+
+ if (shared_mask == RC_MASK_NONE)
+ return;
+
+ /* If we make it this far, it means that this source reads from the
+ * same register written to by d->ReaderData->Writer. */
+
+ if (d->ReaderData->AbortOnRead) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+
+ read_mask = rc_swizzle_to_writemask(src->Swizzle);
+ /* XXX The behavior in this case should be configurable. */
+ if ((read_mask & d->AliveWriteMask) != read_mask) {
+ d->ReaderData->Abort = 1;
+ return;
+ }
+
+ d->ReadCB(d->ReaderData, inst, src);
+ if (d->ReaderData->Abort)
+ return;
+
+ add_reader(&d->C->Pool, d->ReaderData, inst, shared_mask, src);
+}
+
+/**
+ * This function is used by rc_get_readers_normal() to determine when
+ * userdata->ReaderData->Writer is dead (i. e. All compontents of its
+ * destination register have been overwritten by other instructions).
+ */
+static void get_readers_write_callback(
+ void *userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct get_readers_callback_data * d = userdata;
+
+ if (index == d->ReaderData->Writer->U.I.DstReg.Index
+ && file == d->ReaderData->Writer->U.I.DstReg.File) {
+ unsigned int shared_mask = mask
+ & d->ReaderData->Writer->U.I.DstReg.WriteMask;
+ if (d->ReaderData->InElse) {
+ if (shared_mask & d->AliveWriteMask) {
+ /* We set AbortOnRead here because the
+ * destination register of d->ReaderData->Writer
+ * is written to in both the IF and the
+ * ELSE block of this IF/ELSE statement.
+ * This means that readers of this
+ * destination register that follow this IF/ELSE
+ * statement use the value of different
+ * instructions depending on the control flow
+ * decisions made by the program. */
+ d->ReaderData->AbortOnRead = 1;
+ }
+ } else {
+ d->AliveWriteMask &= ~shared_mask;
+ }
+ }
+
+ d->WriteCB(d->ReaderData, inst, file, index, mask);
+}
+
+/**
+ * This function will create a list of readers via the rc_reader_data struct.
+ * This function will abort (set the flag data->Abort) and return if it
+ * encounters an instruction that reads from @param writer and also a different
+ * instruction. Here are some examples:
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0].xy, TEMP[1].xy
+ * 1 MOV TEMP[0].zw, TEMP[2].xy
+ * 2 MOV TEMP[3], TEMP[0]
+ * The Abort flag will be set on instruction 2, because it reads values written
+ * by instructions 0 and 1.
+ *
+ * writer = instruction 1;
+ * 0 IF TEMP[0].x
+ * 1 MOV TEMP[1], TEMP[2]
+ * 2 ELSE
+ * 3 MOV TEMP[1], TEMP[2]
+ * 4 ENDIF
+ * 5 MOV TEMP[3], TEMP[1]
+ * The Abort flag will be set on instruction 5, because it could read from the
+ * value written by either instruction 1 or 3, depending on the jump decision
+ * made at instruction 0.
+ *
+ * writer = instruction 0;
+ * 0 MOV TEMP[0], TEMP[1]
+ * 2 BGNLOOP
+ * 3 ADD TEMP[0], TEMP[0], none.1
+ * 4 ENDLOOP
+ * The Abort flag will be set on instruction 3, because in the first iteration
+ * of the loop it reads the value written by instruction 0 and in all other
+ * iterations it reads the value written by instruction 3.
+ *
+ * @param read_cb This function will be called for for every instruction that
+ * has been determined to be a reader of writer.
+ * @param write_cb This function will be called for every instruction after
+ * writer.
+ */
+void rc_get_readers_normal(
+ struct radeon_compiler * c,
+ struct rc_instruction * writer,
+ struct rc_reader_data * data,
+ rc_read_src_fn read_cb,
+ rc_read_write_mask_fn write_cb)
+{
+ struct rc_instruction * tmp;
+ struct get_readers_callback_data d;
+ unsigned int branch_depth = 0;
+
+ data->Writer = writer;
+ data->Abort = 0;
+ data->AbortOnRead = 0;
+ data->InElse = 0;
+ data->ReaderCount = 0;
+ data->ReadersReserved = 0;
+ data->Readers = NULL;
+
+ d.C = c;
+ d.AliveWriteMask = writer->U.I.DstReg.WriteMask;
+ d.ReaderData = data;
+ d.ReadCB = read_cb;
+ d.WriteCB = write_cb;
+
+ if (!writer->U.I.DstReg.WriteMask)
+ return;
+
+ for(tmp = writer->Next; tmp != &c->Program.Instructions;
+ tmp = tmp->Next){
+ rc_opcode opcode = get_flow_control_inst(tmp);
+ switch(opcode) {
+ case RC_OPCODE_BGNLOOP:
+ /* XXX We can do better when we see a BGNLOOP if we
+ * add a flag called AbortOnWrite to struct
+ * rc_reader_data and leave it set until the next
+ * ENDLOOP. */
+ case RC_OPCODE_ENDLOOP:
+ /* XXX We can do better when we see an ENDLOOP by
+ * searching backwards from writer and looking for
+ * readers of writer's destination index. If we find a
+ * reader before we get to the BGNLOOP, we must abort
+ * unless there is another writer between that reader
+ * and the BGNLOOP. */
+ data->Abort = 1;
+ return;
+ case RC_OPCODE_IF:
+ branch_depth++;
+ break;
+ case RC_OPCODE_ELSE:
+ if (branch_depth == 0)
+ data->InElse = 1;
+ break;
+ case RC_OPCODE_ENDIF:
+ if (branch_depth == 0) {
+ data->AbortOnRead = 1;
+ data->InElse = 0;
+ }
+ else {
+ branch_depth--;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (!data->InElse)
+ rc_for_all_reads_src(tmp, get_readers_normal_read_callback, &d);
+ rc_for_all_writes_mask(tmp, get_readers_write_callback, &d);
+
+ if (data->Abort)
+ return;
+
+ if (!d.AliveWriteMask)
+ return;
+ }
+}
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
struct rc_swizzle_caps;
struct rc_src_register;
struct rc_pair_instruction_arg;
+struct rc_compiler;
/**
void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
/*@}*/
+struct rc_reader {
+ struct rc_instruction * Inst;
+ unsigned int WriteMask;
+ struct rc_src_register * Src;
+};
+
+struct rc_reader_data {
+ unsigned int Abort;
+ unsigned int AbortOnRead;
+ unsigned int InElse;
+ struct rc_instruction * Writer;
+
+ unsigned int ReaderCount;
+ unsigned int ReadersReserved;
+ struct rc_reader * Readers;
+
+ void * CbData;
+};
+
+void rc_get_readers_normal(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ struct rc_reader_data * data,
+ /*XXX: These should be their own function types. */
+ rc_read_src_fn read_cb,
+ rc_read_write_mask_fn write_cb);
/**
* Compiler passes based on dataflow analysis.
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
#include "radeon_dataflow.h"
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_swizzle.h"
struct peephole_state {
int BranchDepth;
};
-/**
- * This is a callback function that is meant to be passed to
- * rc_for_all_reads_mask. This function will be called once for each source
- * register in inst.
- * @param inst The instruction that the source register belongs to.
- * @param file The register file of the source register.
- * @param index The index of the source register.
- * @param mask The components of the source register that are being read from.
- */
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
- rc_register_file file, unsigned int index, unsigned int mask)
+ struct rc_src_register * src)
{
- struct copy_propagate_state * s = data;
+ rc_register_file file = src->File;
+ struct rc_reader_data * reader_data = data;
+ const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX This could probably be handled better. */
- if (file == RC_FILE_ADDRESS) {
- s->Conflict = 1;
+ /* It is possible to do copy propigation in this situation,
+ * just not right now, see peephole_add_presub_inv() */
+ if (reader_data->Writer->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
+ (info->NumSrcRegs > 2 || info->HasTexture)) {
+ reader_data->Abort = 1;
return;
}
- if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+ /* XXX This could probably be handled better. */
+ if (file == RC_FILE_ADDRESS) {
+ reader_data->Abort = 1;
return;
+ }
/* These instructions cannot read from the constants file.
* see radeonTransformTEX()
*/
- if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
- s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+ if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+ reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
(inst->U.I.Opcode == RC_OPCODE_TEX ||
inst->U.I.Opcode == RC_OPCODE_TXB ||
inst->U.I.Opcode == RC_OPCODE_TXP ||
inst->U.I.Opcode == RC_OPCODE_KIL)){
- s->Conflict = 1;
+ reader_data->Abort = 1;
return;
}
- if ((mask & s->MovMask) == mask) {
- if (s->SourceClobbered) {
- s->Conflict = 1;
- }
- } else if ((mask & s->DefinedMask) == mask) {
- /* read from something entirely written by other instruction: this is okay */
- } else {
- /* read from component combination that is not well-defined without
- * the MOV: cannot remove it */
- s->Conflict = 1;
- }
}
static void copy_propagate_scan_write(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
- struct copy_propagate_state * s = data;
-
- if (s->BranchDepth < 0)
- return;
+ struct rc_reader_data * reader_data = data;
+ struct copy_propagate_state * s = reader_data->CbData;
- if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
- s->MovMask &= ~mask;
- if (s->BranchDepth == 0)
- s->DefinedMask |= mask;
- else
- s->DefinedMask &= ~mask;
- }
- if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+ if (file == reader_data->Writer->U.I.SrcReg[0].File && index == reader_data->Writer->U.I.SrcReg[0].Index) {
if (mask & s->SourcedMask)
- s->SourceClobbered = 1;
+ reader_data->AbortOnRead = 1;
} else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
- s->SourceClobbered = 1;
+ reader_data->AbortOnRead = 1;
}
}
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
{
struct copy_propagate_state s;
+ struct rc_reader_data reader_data;
+ unsigned int i;
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
inst_mov->U.I.DstReg.RelAddr ||
s.MovMask = inst_mov->U.I.DstReg.WriteMask;
s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+ reader_data.CbData = &s;
+
for(unsigned int chan = 0; chan < 4; ++chan) {
unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
}
- /* 1st pass: Check whether all subsequent readers can be changed */
- for(struct rc_instruction * inst = inst_mov->Next;
- inst != &c->Program.Instructions;
- inst = inst->Next) {
- const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
- /* XXX In the future we might be able to make the optimizer
- * smart enough to handle loops. */
- if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
- || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
- return;
- }
-
- /* It is possible to do copy propigation in this situation,
- * just not right now, see peephole_add_presub_inv() */
- if (inst_mov->U.I.PreSub.Opcode != RC_PRESUB_NONE &&
- (info->NumSrcRegs > 2 || info->HasTexture)) {
- return;
- }
-
- rc_for_all_reads_mask(inst, copy_propagate_scan_read, &s);
- rc_for_all_writes_mask(inst, copy_propagate_scan_write, &s);
- if (s.Conflict)
- return;
+ /* Get a list of all the readers of this MOV instruction. */
+ rc_get_readers_normal(c, inst_mov, &reader_data,
+ copy_propagate_scan_read, copy_propagate_scan_write);
- if (s.BranchDepth >= 0) {
- if (inst->U.I.Opcode == RC_OPCODE_IF) {
- s.BranchDepth++;
- } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
- || inst->U.I.Opcode == RC_OPCODE_ELSE) {
- s.BranchDepth--;
- if (s.BranchDepth < 0) {
- s.DefinedMask &= ~s.MovMask;
- s.MovMask = 0;
- }
- }
- }
- }
-
- if (s.Conflict)
+ if (reader_data.Abort || reader_data.ReaderCount == 0)
return;
- /* 2nd pass: We can satisfy all readers, so switch them over all at once */
- s.MovMask = inst_mov->U.I.DstReg.WriteMask;
- s.BranchDepth = 0;
-
- for(struct rc_instruction * inst = inst_mov->Next;
- inst != &c->Program.Instructions;
- inst = inst->Next) {
- const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
- for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
- if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
- inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
- unsigned int refmask = 0;
-
- for(unsigned int chan = 0; chan < 4; ++chan) {
- unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
- refmask |= (1 << swz) & RC_MASK_XYZW;
- }
-
- if ((refmask & s.MovMask) == refmask) {
- inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
- if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
- inst->U.I.PreSub = s.Mov->U.I.PreSub;
- }
- }
- }
-
- if (opcode->HasDstReg) {
- if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
- inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
- s.MovMask &= ~inst->U.I.DstReg.WriteMask;
- }
- }
+ /* Propagate the MOV instruction. */
+ for (i = 0; i < reader_data.ReaderCount; i++) {
+ struct rc_instruction * inst = reader_data.Readers[i].Inst;
+ *reader_data.Readers[i].Src = chain_srcregs(*reader_data.Readers[i].Src, s.Mov->U.I.SrcReg[0]);
- if (s.BranchDepth >= 0) {
- if (inst->U.I.Opcode == RC_OPCODE_IF) {
- s.BranchDepth++;
- } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF
- || inst->U.I.Opcode == RC_OPCODE_ELSE) {
- s.BranchDepth--;
- if (s.BranchDepth < 0)
- break; /* no more readers after this point */
- }
- }
+ if (s.Mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
+ inst->U.I.PreSub = s.Mov->U.I.PreSub;
}
/* Finally, remove the original MOV instruction */
static unsigned int src_reads_dst_mask(struct rc_src_register src,
struct rc_dst_register dst)
{
- unsigned int mask = 0;
- unsigned int i;
if (dst.File != src.File || dst.Index != src.Index) {
return 0;
}
-
- for(i = 0; i < 4; i++) {
- mask |= 1 << GET_SWZ(src.Swizzle, i);
- }
- mask &= RC_MASK_XYZW;
-
- return mask;
+ return rc_swizzle_to_writemask(src.Swizzle);
}
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)