/*
* Mesa 3-D graphics library
- * Version: 7.5
*
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
*
#define MAX_LOOP_NESTING 50
-
+/* MAX_PROGRAM_TEMPS is a low number (256), and we want to be able to
+ * register allocate many temporary values into that small number of
+ * temps. So allow large temporary indices coming into the register
+ * allocator.
+ */
+#define REG_ALLOCATE_MAX_PROGRAM_TEMPS ((1 << INST_INDEX_BITS) - 1)
static GLboolean dbg = GL_FALSE;
#define NO_MASK 0xf
-/** Returns the mask of channels read from the given src in this instruction, We
- * also provide one optional masks which may mask other components in the dst
- * register
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction, We also provide
+ * one optional masks which may mask other components in the dst
+ * register
*/
static GLuint
-get_src_arg_mask(const struct prog_instruction *inst, int arg, int dst_mask)
+get_src_arg_mask(const struct prog_instruction *inst,
+ GLuint arg, GLuint dst_mask)
{
- int read_mask, channel_mask;
- int comp;
- ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
+ GLuint read_mask, channel_mask;
+ GLuint comp;
+
+ assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
/* Form the dst register, find the written channels */
if (inst->CondUpdate) {
case OPCODE_MAD:
case OPCODE_MUL:
case OPCODE_SUB:
+ case OPCODE_CMP:
+ case OPCODE_FLR:
+ case OPCODE_FRC:
+ case OPCODE_LRP:
+ case OPCODE_SEQ:
+ case OPCODE_SGE:
+ case OPCODE_SGT:
+ case OPCODE_SLE:
+ case OPCODE_SLT:
+ case OPCODE_SNE:
+ case OPCODE_SSG:
channel_mask = inst->DstReg.WriteMask & dst_mask;
break;
case OPCODE_RCP:
/* Now, given the src swizzle and the written channels, find which
* components are actually read
*/
- read_mask = 0;
+ read_mask = 0x0;
for (comp = 0; comp < 4; ++comp) {
- const int coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
- ASSERT(coord < 4);
+ const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
read_mask |= 1 << coord;
}
return read_mask;
}
-/** For a MOV instruction, compute a write mask when src register also has
- * a mask
+
+/**
+ * For a MOV instruction, compute a write mask when src register also has
+ * a mask
*/
static GLuint
-get_dst_mask_for_mov(const struct prog_instruction *mov, int src_mask)
+get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
{
- const int mask = mov->DstReg.WriteMask;
- int comp;
- int updated_mask = 0;
- ASSERT(mov->Opcode == OPCODE_MOV);
+ const GLuint mask = mov->DstReg.WriteMask;
+ GLuint comp;
+ GLuint updated_mask = 0x0;
+
+ assert(mov->Opcode == OPCODE_MOV);
for (comp = 0; comp < 4; ++comp) {
- int src_comp;
+ GLuint src_comp;
if ((mask & (1 << comp)) == 0)
continue;
src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
return updated_mask;
}
-/** Ensure that the swizzle is regular */
-static int
-is_swizzle_regular(int swz)
+
+/**
+ * Ensure that the swizzle is regular. That is, all of the swizzle
+ * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
+ */
+static GLboolean
+is_swizzle_regular(GLuint swz)
{
return GET_SWZ(swz,0) <= SWIZZLE_W &&
GET_SWZ(swz,1) <= SWIZZLE_W &&
GET_SWZ(swz,3) <= SWIZZLE_W;
}
+
/**
* In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
* \return number of instructions removed
for (j = 0; j < numSrc; j++) {
if (inst->SrcReg[j].File == file) {
GLuint index = inst->SrcReg[j].Index;
- ASSERT(map[index] >= 0);
+ assert(map[index] >= 0);
inst->SrcReg[j].Index = map[index];
}
}
if (inst->DstReg.File == file) {
const GLuint index = inst->DstReg.Index;
- ASSERT(map[index] >= 0);
+ assert(map[index] >= 0);
inst->DstReg.Index = map[index];
}
}
}
+
/**
* Remove dead instructions from the given program.
* This is very primitive for now. Basically look for temp registers
static GLboolean
_mesa_remove_dead_code_global(struct gl_program *prog)
{
- GLboolean tempRead[MAX_PROGRAM_TEMPS][4];
+ GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
GLboolean *removeInst; /* per-instruction removal flag */
GLuint i, rem = 0, comp;
/*_mesa_print_program(prog);*/
}
- removeInst = (GLboolean *)
- calloc(1, prog->NumInstructions * sizeof(GLboolean));
+ removeInst =
+ calloc(prog->NumInstructions, sizeof(GLboolean));
/* Determine which temps are read and written */
for (i = 0; i < prog->NumInstructions; i++) {
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
const GLuint index = inst->SrcReg[j].Index;
GLuint read_mask;
- ASSERT(index < MAX_PROGRAM_TEMPS);
+ assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
read_mask = get_src_arg_mask(inst, j, NO_MASK);
if (inst->SrcReg[j].RelAddr) {
for (comp = 0; comp < 4; comp++) {
const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
- ASSERT(swz < 4);
- if ((read_mask & (1 << swz)) == 0)
- continue;
- if (swz <= SWIZZLE_W)
+ if (swz <= SWIZZLE_W) {
+ if ((read_mask & (1 << swz)) == 0)
+ continue;
tempRead[index][swz] = GL_TRUE;
+ }
}
}
}
/* check dst reg */
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
const GLuint index = inst->DstReg.Index;
- ASSERT(index < MAX_PROGRAM_TEMPS);
+ assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
if (inst->DstReg.RelAddr) {
if (dbg)
END
};
+
/**
* Scan forward in program from 'start' for the next occurances of TEMP[index].
* We look if an instruction reads the component given by the masks and if they
switch (inst->Opcode) {
case OPCODE_BGNLOOP:
case OPCODE_BGNSUB:
- case OPCODE_BRA:
case OPCODE_CAL:
case OPCODE_CONT:
case OPCODE_IF:
for (j = 0; j < numSrc; j++) {
if (inst->SrcReg[j].RelAddr ||
(inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
- inst->SrcReg[j].Index == index &&
+ inst->SrcReg[j].Index == (GLint)index &&
(get_src_arg_mask(inst,j,NO_MASK) & mask)))
return READ;
}
return END;
}
-static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
+
+/**
+ * Is the given instruction opcode a flow-control opcode?
+ * XXX maybe move this into prog_instruction.[ch]
+ */
+static GLboolean
+_mesa_is_flow_control_opcode(enum prog_opcode opcode)
{
switch (opcode) {
case OPCODE_BGNLOOP:
case OPCODE_BGNSUB:
- case OPCODE_BRA:
case OPCODE_CAL:
case OPCODE_CONT:
case OPCODE_IF:
}
}
+
+/**
+ * Test if the given instruction is a simple MOV (no conditional updating,
+ * not relative addressing, no negation/abs, etc).
+ */
static GLboolean
can_downward_mov_be_modifed(const struct prog_instruction *mov)
{
mov->SrcReg[0].HasIndex2 == 0 &&
mov->SrcReg[0].RelAddr2 == 0 &&
mov->DstReg.RelAddr == 0 &&
- mov->DstReg.CondMask == COND_TR &&
- mov->SaturateMode == SATURATE_OFF;
+ mov->DstReg.CondMask == COND_TR;
}
+
static GLboolean
can_upward_mov_be_modifed(const struct prog_instruction *mov)
{
return
can_downward_mov_be_modifed(mov) &&
- mov->DstReg.File == PROGRAM_TEMPORARY;
+ mov->DstReg.File == PROGRAM_TEMPORARY &&
+ !mov->Saturate;
}
+
/**
* Try to remove use of extraneous MOV instructions, to free them up for dead
* code removal.
for (i = 0; i + 1 < prog->NumInstructions; i++) {
const struct prog_instruction *mov = prog->Instructions + i;
- int dst_mask, src_mask;
+ GLuint dst_mask, src_mask;
if (can_upward_mov_be_modifed(mov) == GL_FALSE)
continue;
/* First rewrite this instruction's args if appropriate. */
for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
- int comp, read_mask;
+ GLuint comp, read_mask;
if (inst2->SrcReg[arg].File != mov->DstReg.File ||
inst2->SrcReg[arg].Index != mov->DstReg.Index ||
/* Adjust the swizzles of inst2 to point at MOV's source if ALL the
* components read still come from the mov instructions
*/
- if(is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
+ if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
(read_mask & dst_mask) == read_mask) {
for (comp = 0; comp < 4; comp++) {
- int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ const GLuint inst2_swz =
+ GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+ const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
}
}
+
/**
* Complements dead_code_global. Try to remove code in block of code by
* carefully monitoring the swizzles. Both functions should be merged into one
GLboolean *removeInst;
GLuint i, arg, rem = 0;
- removeInst = (GLboolean *)
- calloc(1, prog->NumInstructions * sizeof(GLboolean));
+ removeInst =
+ calloc(prog->NumInstructions, sizeof(GLboolean));
for (i = 0; i < prog->NumInstructions; i++) {
const struct prog_instruction *inst = prog->Instructions + i;
return rem != 0;
}
+
/**
* Try to inject the destination of mov as the destination of inst and recompute
* the swizzles operators for the sources of inst if required. Return GL_TRUE
if (mask != (inst->DstReg.WriteMask & mask))
return GL_FALSE;
+ inst->Saturate |= mov->Saturate;
+
/* Depending on the instruction, we may need to recompute the swizzles.
* Also, some other instructions (like TEX) are not linear. We will only
* consider completely active sources and destinations
case OPCODE_MUL:
case OPCODE_SUB:
{
- int dst_to_src_comp[4] = {0,0,0,0};
- int dst_comp, arg;
+ GLuint dst_to_src_comp[4] = {0,0,0,0};
+ GLuint dst_comp, arg;
for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
if (mov->DstReg.WriteMask & (1 << dst_comp)) {
- const int src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
- ASSERT(src_comp < 4);
+ const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
+ assert(src_comp < 4);
dst_to_src_comp[dst_comp] = src_comp;
}
}
/* Patch each source of the instruction */
for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
- const int arg_swz = inst->SrcReg[arg].Swizzle;
+ const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
inst->SrcReg[arg].Swizzle = 0;
/* Reset each active component of the swizzle */
for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
- int src_comp, arg_comp;
+ GLuint src_comp, arg_comp;
if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
continue;
src_comp = dst_to_src_comp[dst_comp];
- ASSERT(src_comp < 4);
+ assert(src_comp < 4);
arg_comp = GET_SWZ(arg_swz, src_comp);
- ASSERT(arg_comp < 4);
+ assert(arg_comp < 4);
inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
}
}
}
}
+
/**
* Try to remove extraneous MOV instructions from the given program.
*/
_mesa_print_program(prog);
}
- removeInst = (GLboolean *)
- calloc(1, prog->NumInstructions * sizeof(GLboolean));
+ removeInst =
+ calloc(prog->NumInstructions, sizeof(GLboolean));
/*
* Look for sequences such as this:
nesting--;
break;
case OPCODE_MOV:
- if (i > 0 && can_downward_mov_be_modifed(mov) && nesting == 0) {
+ if (i > 0 &&
+ can_downward_mov_be_modifed(mov) &&
+ mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
+ nesting == 0)
+ {
/* see if this MOV can be removed */
const GLuint id = mov->SrcReg[0].Index;
if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
prevInst->DstReg.Index == id &&
prevInst->DstReg.RelAddr == 0 &&
- prevInst->DstReg.CondSrc == 0 &&
prevInst->DstReg.CondMask == COND_TR) {
const GLuint dst_mask = prevInst->DstReg.WriteMask;
struct interval_list
{
GLuint Num;
- struct interval Intervals[MAX_PROGRAM_TEMPS];
+ struct interval Intervals[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
};
{
GLuint i;
for (i = 0; i + 1 < list->Num; i++) {
- ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
+ assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
}
}
#endif
for (k = 0; k < list->Num; k++) {
if (list->Intervals[k].Reg == inv->Reg) {
/* found, remove it */
- ASSERT(list->Intervals[k].Start == inv->Start);
- ASSERT(list->Intervals[k].End == inv->End);
+ assert(list->Intervals[k].Start == inv->Start);
+ assert(list->Intervals[k].End == inv->End);
while (k < list->Num - 1) {
list->Intervals[k] = list->Intervals[k + 1];
k++;
return 0;
}
+
/** sort the interval list according to interval starts */
static void
sort_interval_list_by_start(struct interval_list *list)
{
GLuint i;
for (i = 0; i + 1 < list->Num; i++) {
- ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
+ assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
}
}
#endif
}
+struct loop_info
+{
+ GLuint Start, End; /**< Start, end instructions of loop */
+};
/**
* Update the intermediate interval info for register 'index' and
* instruction 'ic'.
*/
static void
-update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic)
+update_interval(GLint intBegin[], GLint intEnd[],
+ struct loop_info *loopStack, GLuint loopStackDepth,
+ GLuint index, GLuint ic)
{
- ASSERT(index < MAX_PROGRAM_TEMPS);
+ unsigned i;
+ GLuint begin = ic;
+ GLuint end = ic;
+
+ /* If the register is used in a loop, extend its lifetime through the end
+ * of the outermost loop that doesn't contain its definition.
+ */
+ for (i = 0; i < loopStackDepth; i++) {
+ if (intBegin[index] < loopStack[i].Start) {
+ end = loopStack[i].End;
+ break;
+ }
+ }
+
+ /* Variables that are live at the end of a loop will also be live at the
+ * beginning, so an instruction inside of a loop should have its live
+ * interval begin at the start of the outermost loop.
+ */
+ if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
+ begin = loopStack[0].Start;
+ }
+
+ assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
if (intBegin[index] == -1) {
- ASSERT(intEnd[index] == -1);
- intBegin[index] = intEnd[index] = ic;
+ assert(intEnd[index] == -1);
+ intBegin[index] = begin;
+ intEnd[index] = end;
}
else {
- intEnd[index] = ic;
+ intEnd[index] = end;
}
}
GLboolean
_mesa_find_temp_intervals(const struct prog_instruction *instructions,
GLuint numInstructions,
- GLint intBegin[MAX_PROGRAM_TEMPS],
- GLint intEnd[MAX_PROGRAM_TEMPS])
+ GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS],
+ GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
{
- struct loop_info
- {
- GLuint Start, End; /**< Start, end instructions of loop */
- };
struct loop_info loopStack[MAX_LOOP_NESTING];
GLuint loopStackDepth = 0;
GLuint i;
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
intBegin[i] = intEnd[i] = -1;
}
const GLuint index = inst->SrcReg[j].Index;
if (inst->SrcReg[j].RelAddr)
return GL_FALSE;
- update_interval(intBegin, intEnd, index, i);
- if (loopStackDepth > 0) {
- /* extend temp register's interval to end of loop */
- GLuint loopEnd = loopStack[loopStackDepth - 1].End;
- update_interval(intBegin, intEnd, index, loopEnd);
- }
+ update_interval(intBegin, intEnd, loopStack, loopStackDepth,
+ index, i);
}
}
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
const GLuint index = inst->DstReg.Index;
if (inst->DstReg.RelAddr)
return GL_FALSE;
- update_interval(intBegin, intEnd, index, i);
- if (loopStackDepth > 0) {
- /* extend temp register's interval to end of loop */
- GLuint loopEnd = loopStack[loopStackDepth - 1].End;
- update_interval(intBegin, intEnd, index, loopEnd);
- }
+ update_interval(intBegin, intEnd, loopStack, loopStackDepth,
+ index, i);
}
}
}
find_live_intervals(struct gl_program *prog,
struct interval_list *liveIntervals)
{
- GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS];
+ GLint intBegin[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLint intEnd[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
GLuint i;
/*
/* Build live intervals list from intermediate arrays */
liveIntervals->Num = 0;
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
if (intBegin[i] >= 0) {
struct interval inv;
inv.Reg = i;
/** Scan the array of used register flags to find free entry */
static GLint
-alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS])
+alloc_register(GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS])
{
GLuint k;
- for (k = 0; k < MAX_PROGRAM_TEMPS; k++) {
+ for (k = 0; k < REG_ALLOCATE_MAX_PROGRAM_TEMPS; k++) {
if (!usedRegs[k]) {
usedRegs[k] = GL_TRUE;
return k;
_mesa_reallocate_registers(struct gl_program *prog)
{
struct interval_list liveIntervals;
- GLint registerMap[MAX_PROGRAM_TEMPS];
- GLboolean usedRegs[MAX_PROGRAM_TEMPS];
+ GLint registerMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLboolean usedRegs[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
GLuint i;
GLint maxTemp = -1;
_mesa_print_program(prog);
}
- for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++){
registerMap[i] = -1;
usedRegs[i] = GL_FALSE;
}
else {
/* Interval 'inv' has expired */
const GLint regNew = registerMap[inv->Reg];
- ASSERT(regNew >= 0);
+ assert(regNew >= 0);
if (dbg)
printf(" expire interval for reg %u\n", inv->Reg);
/* return register regNew to the free pool */
if (dbg)
printf(" free reg %d\n", regNew);
- ASSERT(usedRegs[regNew] == GL_TRUE);
+ assert(usedRegs[regNew] == GL_TRUE);
usedRegs[regNew] = GL_FALSE;
}
}
}
}
+
#if 0
static void
-print_it(GLcontext *ctx, struct gl_program *program, const char *txt) {
+print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
_mesa_print_program(program);
_mesa_print_program_parameters(ctx, program);
}
#endif
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0. The are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * MOV T0, T2;
+ * else
+ * MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program. If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+static void
+_mesa_simplify_cmp(struct gl_program * program)
+{
+ GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
+ GLuint i;
+
+ if (dbg) {
+ printf("Optimize: Begin reads without writes\n");
+ _mesa_print_program(program);
+ }
+
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
+ tempWrites[i] = 0;
+ }
+
+ for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
+ outputWrites[i] = 0;
+ }
+
+ for (i = 0; i < program->NumInstructions; i++) {
+ struct prog_instruction *inst = program->Instructions + i;
+ GLuint prevWriteMask;
+
+ /* Give up if we encounter relative addressing or flow control. */
+ if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
+ return;
+ }
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
+ prevWriteMask = outputWrites[inst->DstReg.Index];
+ outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
+ } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ prevWriteMask = tempWrites[inst->DstReg.Index];
+ tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
+ } else {
+ /* No other register type can be a destination register. */
+ continue;
+ }
+
+ /* For a CMP to be considered a conditional write, the destination
+ * register and source register two must be the same. */
+ if (inst->Opcode == OPCODE_CMP
+ && !(inst->DstReg.WriteMask & prevWriteMask)
+ && inst->SrcReg[2].File == inst->DstReg.File
+ && inst->SrcReg[2].Index == inst->DstReg.Index
+ && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = inst->SrcReg[1];
+
+ /* Unused operands are expected to have the file set to
+ * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes
+ * all of the sources.
+ */
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+ }
+ }
+ if (dbg) {
+ printf("Optimize: End reads without writes\n");
+ _mesa_print_program(program);
+ }
+}
+
/**
* Apply optimizations to the given program to eliminate unnecessary
* instructions, temp regs, etc.
*/
void
-_mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
+_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program)
{
GLboolean any_change;
+ _mesa_simplify_cmp(program);
/* Stop when no modifications were output */
do {
any_change = GL_FALSE;
any_change = GL_TRUE;
if (_mesa_remove_dead_code_local(program))
any_change = GL_TRUE;
+
+ any_change = _mesa_constant_fold(program) || any_change;
_mesa_reallocate_registers(program);
} while (any_change);
}