/*
* Mesa 3-D graphics library
- * Version: 7.5
*
* Copyright (C) 2009 VMware, Inc. All Rights Reserved.
*
static GLboolean dbg = GL_FALSE;
-/* Returns the mask of channels read from the given srcreg in this instruction.
+#define NO_MASK 0xf
+
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction, We also provide
+ * one optional masks which may mask other components in the dst
+ * register
*/
static GLuint
-get_src_arg_mask(const struct prog_instruction *inst, int arg)
+get_src_arg_mask(const struct prog_instruction *inst,
+ GLuint arg, GLuint dst_mask)
{
- int writemask = inst->DstReg.WriteMask;
+ GLuint read_mask, channel_mask;
+ GLuint comp;
- if (inst->CondUpdate)
- writemask = WRITEMASK_XYZW;
+ assert(arg < _mesa_num_inst_src_regs(inst->Opcode));
+ /* Form the dst register, find the written channels */
switch (inst->Opcode) {
case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
case OPCODE_ABS:
case OPCODE_ADD:
+ case OPCODE_MAD:
case OPCODE_MUL:
case OPCODE_SUB:
- return writemask;
+ case OPCODE_CMP:
+ case OPCODE_FLR:
+ case OPCODE_FRC:
+ case OPCODE_LRP:
+ case OPCODE_SGE:
+ case OPCODE_SLT:
+ case OPCODE_SSG:
+ channel_mask = inst->DstReg.WriteMask & dst_mask;
+ break;
case OPCODE_RCP:
case OPCODE_SIN:
case OPCODE_COS:
case OPCODE_RSQ:
case OPCODE_POW:
case OPCODE_EX2:
- return WRITEMASK_X;
+ case OPCODE_LOG:
+ channel_mask = WRITEMASK_X;
+ break;
case OPCODE_DP2:
- return WRITEMASK_XY;
+ channel_mask = WRITEMASK_XY;
+ break;
case OPCODE_DP3:
case OPCODE_XPD:
- return WRITEMASK_XYZ;
+ channel_mask = WRITEMASK_XYZ;
+ break;
default:
- return WRITEMASK_XYZW;
+ channel_mask = WRITEMASK_XYZW;
+ break;
+ }
+
+ /* Now, given the src swizzle and the written channels, find which
+ * components are actually read
+ */
+ read_mask = 0x0;
+ for (comp = 0; comp < 4; ++comp) {
+ const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
+ if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
+ read_mask |= 1 << coord;
+ }
+
+ return read_mask;
+}
+
+
+/**
+ * For a MOV instruction, compute a write mask when src register also has
+ * a mask
+ */
+static GLuint
+get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
+{
+ const GLuint mask = mov->DstReg.WriteMask;
+ GLuint comp;
+ GLuint updated_mask = 0x0;
+
+ assert(mov->Opcode == OPCODE_MOV);
+
+ for (comp = 0; comp < 4; ++comp) {
+ GLuint src_comp;
+ if ((mask & (1 << comp)) == 0)
+ continue;
+ src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
+ if ((src_mask & (1 << src_comp)) == 0)
+ continue;
+ updated_mask |= 1 << comp;
}
+
+ return updated_mask;
+}
+
+
+/**
+ * Ensure that the swizzle is regular. That is, all of the swizzle
+ * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
+ */
+static GLboolean
+is_swizzle_regular(GLuint swz)
+{
+ return GET_SWZ(swz,0) <= SWIZZLE_W &&
+ GET_SWZ(swz,1) <= SWIZZLE_W &&
+ GET_SWZ(swz,2) <= SWIZZLE_W &&
+ GET_SWZ(swz,3) <= SWIZZLE_W;
}
+
/**
* In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
* \return number of instructions removed
*/
static GLuint
-remove_instructions(struct gl_program *prog, const GLboolean *removeFlags)
+remove_instructions(struct gl_program *prog, const GLboolean *removeFlags,
+ void *mem_ctx)
{
GLint i, removeEnd = 0, removeCount = 0;
GLuint totalRemoved = 0;
/* go backward */
- for (i = prog->NumInstructions - 1; i >= 0; i--) {
+ for (i = prog->arb.NumInstructions - 1; i >= 0; i--) {
if (removeFlags[i]) {
totalRemoved++;
if (removeCount == 0) {
*/
if (removeCount > 0) {
GLint removeStart = removeEnd - removeCount + 1;
- _mesa_delete_instructions(prog, removeStart, removeCount);
+ _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
removeStart = removeCount = 0; /* reset removal info */
}
}
/* Finish removing if the first instruction was to be removed. */
if (removeCount > 0) {
GLint removeStart = removeEnd - removeCount + 1;
- _mesa_delete_instructions(prog, removeStart, removeCount);
+ _mesa_delete_instructions(prog, removeStart, removeCount, mem_ctx);
}
return totalRemoved;
}
{
GLuint i;
- for (i = 0; i < prog->NumInstructions; i++) {
- struct prog_instruction *inst = prog->Instructions + i;
+ for (i = 0; i < prog->arb.NumInstructions; i++) {
+ struct prog_instruction *inst = prog->arb.Instructions + i;
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
GLuint j;
for (j = 0; j < numSrc; j++) {
if (inst->SrcReg[j].File == file) {
GLuint index = inst->SrcReg[j].Index;
- ASSERT(map[index] >= 0);
+ assert(map[index] >= 0);
inst->SrcReg[j].Index = map[index];
}
}
if (inst->DstReg.File == file) {
const GLuint index = inst->DstReg.Index;
- ASSERT(map[index] >= 0);
+ assert(map[index] >= 0);
inst->DstReg.Index = map[index];
}
}
}
-/**
- * Consolidate temporary registers to use low numbers. For example, if the
- * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
- */
-static void
-_mesa_consolidate_registers(struct gl_program *prog)
-{
- GLboolean tempUsed[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
- GLint tempMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
- GLuint tempMax = 0, i;
-
- if (dbg) {
- printf("Optimize: Begin register consolidation\n");
- }
-
- memset(tempUsed, 0, sizeof(tempUsed));
-
- for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
- tempMap[i] = -1;
- }
-
- /* set tempUsed[i] if temporary [i] is referenced */
- for (i = 0; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
- const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint j;
- for (j = 0; j < numSrc; j++) {
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->SrcReg[j].Index;
- ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
- tempUsed[index] = GL_TRUE;
- tempMax = MAX2(tempMax, index);
- break;
- }
- }
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->DstReg.Index;
- ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
- tempUsed[index] = GL_TRUE;
- tempMax = MAX2(tempMax, index);
- }
- }
-
- /* allocate a new index for each temp that's used */
- {
- GLuint freeTemp = 0;
- for (i = 0; i <= tempMax; i++) {
- if (tempUsed[i]) {
- tempMap[i] = freeTemp++;
- /*printf("replace %u with %u\n", i, tempMap[i]);*/
- }
- }
- if (freeTemp == tempMax + 1) {
- /* no consolidation possible */
- return;
- }
- if (dbg) {
- printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1);
- }
- }
-
- replace_regs(prog, PROGRAM_TEMPORARY, tempMap);
-
- if (dbg) {
- printf("Optimize: End register consolidation\n");
- }
-}
-
-
/**
* Remove dead instructions from the given program.
* This is very primitive for now. Basically look for temp registers
* that are written to but never read. Remove any instructions that
* write to such registers. Be careful with condition code setters.
*/
-static void
-_mesa_remove_dead_code(struct gl_program *prog)
+static GLboolean
+_mesa_remove_dead_code_global(struct gl_program *prog, void *mem_ctx)
{
GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
GLboolean *removeInst; /* per-instruction removal flag */
/*_mesa_print_program(prog);*/
}
- removeInst = (GLboolean *)
- calloc(1, prog->NumInstructions * sizeof(GLboolean));
+ removeInst =
+ calloc(prog->arb.NumInstructions, sizeof(GLboolean));
/* Determine which temps are read and written */
- for (i = 0; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
+ for (i = 0; i < prog->arb.NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->arb.Instructions + i;
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
GLuint j;
if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
const GLuint index = inst->SrcReg[j].Index;
GLuint read_mask;
- ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
- read_mask = get_src_arg_mask(inst, j);
+ assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ read_mask = get_src_arg_mask(inst, j, NO_MASK);
if (inst->SrcReg[j].RelAddr) {
if (dbg)
}
for (comp = 0; comp < 4; comp++) {
- GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7;
-
- if ((read_mask & (1 << comp)) == 0)
- continue;
-
- switch (swz) {
- case SWIZZLE_X:
- tempRead[index][0] = GL_TRUE;
- break;
- case SWIZZLE_Y:
- tempRead[index][1] = GL_TRUE;
- break;
- case SWIZZLE_Z:
- tempRead[index][2] = GL_TRUE;
- break;
- case SWIZZLE_W:
- tempRead[index][3] = GL_TRUE;
- break;
- }
+ const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
+ if (swz <= SWIZZLE_W) {
+ if ((read_mask & (1 << swz)) == 0)
+ continue;
+ tempRead[index][swz] = GL_TRUE;
+ }
}
}
}
/* check dst reg */
if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->DstReg.Index;
- ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
if (inst->DstReg.RelAddr) {
if (dbg)
printf("abort remove dead code (indirect temp)\n");
goto done;
}
-
- if (inst->CondUpdate) {
- /* If we're writing to this register and setting condition
- * codes we cannot remove the instruction. Prevent removal
- * by setting the 'read' flag.
- */
- tempRead[index][0] = GL_TRUE;
- tempRead[index][1] = GL_TRUE;
- tempRead[index][2] = GL_TRUE;
- tempRead[index][3] = GL_TRUE;
- }
}
}
/* find instructions that write to dead registers, flag for removal */
- for (i = 0; i < prog->NumInstructions; i++) {
- struct prog_instruction *inst = prog->Instructions + i;
+ for (i = 0; i < prog->arb.NumInstructions; i++) {
+ struct prog_instruction *inst = prog->arb.Instructions + i;
const GLuint numDst = _mesa_num_inst_dst_regs(inst->Opcode);
if (numDst != 0 && inst->DstReg.File == PROGRAM_TEMPORARY) {
}
/* now remove the instructions which aren't needed */
- rem = remove_instructions(prog, removeInst);
+ rem = remove_instructions(prog, removeInst, mem_ctx);
if (dbg) {
printf("Optimize: End dead code removal.\n");
done:
free(removeInst);
+ return rem != 0;
}
-enum temp_use
+enum inst_use
{
READ,
WRITE,
END
};
+
/**
- * Scan forward in program from 'start' for the next occurance of TEMP[index].
+ * Scan forward in program from 'start' for the next occurances of TEMP[index].
+ * We look if an instruction reads the component given by the masks and if they
+ * are overwritten.
* Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
* that we can't look further.
*/
-static enum temp_use
-find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
+static enum inst_use
+find_next_use(const struct gl_program *prog,
+ GLuint start,
+ GLuint index,
+ GLuint mask)
{
GLuint i;
- for (i = start; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
+ for (i = start; i < prog->arb.NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->arb.Instructions + i;
switch (inst->Opcode) {
case OPCODE_BGNLOOP:
- case OPCODE_ENDLOOP:
case OPCODE_BGNSUB:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_IF:
+ case OPCODE_ELSE:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
case OPCODE_ENDSUB:
+ case OPCODE_RET:
return FLOW;
+ case OPCODE_END:
+ return END;
default:
{
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
GLuint j;
for (j = 0; j < numSrc; j++) {
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
- inst->SrcReg[j].Index == index)
+ if (inst->SrcReg[j].RelAddr ||
+ (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
+ inst->SrcReg[j].Index == (GLint)index &&
+ (get_src_arg_mask(inst,j,NO_MASK) & mask)))
return READ;
}
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == index)
- return WRITE;
+ if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
+ inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == index) {
+ mask &= ~inst->DstReg.WriteMask;
+ if (mask == 0)
+ return WRITE;
+ }
}
}
}
-
return END;
}
-static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
+
+/**
+ * Is the given instruction opcode a flow-control opcode?
+ * XXX maybe move this into prog_instruction.[ch]
+ */
+static GLboolean
+_mesa_is_flow_control_opcode(enum prog_opcode opcode)
{
switch (opcode) {
case OPCODE_BGNLOOP:
case OPCODE_BGNSUB:
- case OPCODE_BRA:
case OPCODE_CAL:
case OPCODE_CONT:
case OPCODE_IF:
}
}
+
+/**
+ * Test if the given instruction is a simple MOV (no conditional updating,
+ * not relative addressing, no negation/abs, etc).
+ */
+static GLboolean
+can_downward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ mov->Opcode == OPCODE_MOV &&
+ mov->SrcReg[0].RelAddr == 0 &&
+ mov->SrcReg[0].Negate == 0 &&
+ mov->DstReg.RelAddr == 0;
+}
+
+
+static GLboolean
+can_upward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ can_downward_mov_be_modifed(mov) &&
+ mov->DstReg.File == PROGRAM_TEMPORARY &&
+ !mov->Saturate;
+}
+
+
/**
* Try to remove use of extraneous MOV instructions, to free them up for dead
* code removal.
* FOO tmpY, arg0, arg1;
*/
- for (i = 0; i + 1 < prog->NumInstructions; i++) {
- const struct prog_instruction *mov = prog->Instructions + i;
+ for (i = 0; i + 1 < prog->arb.NumInstructions; i++) {
+ const struct prog_instruction *mov = prog->arb.Instructions + i;
+ GLuint dst_mask, src_mask;
+ if (can_upward_mov_be_modifed(mov) == GL_FALSE)
+ continue;
- if (mov->Opcode != OPCODE_MOV ||
- mov->DstReg.File != PROGRAM_TEMPORARY ||
- mov->DstReg.RelAddr ||
- mov->DstReg.CondMask != COND_TR ||
- mov->SaturateMode != SATURATE_OFF ||
- mov->SrcReg[0].RelAddr)
- continue;
+ /* Scanning the code, we maintain the components which are still active in
+ * these two masks
+ */
+ dst_mask = mov->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, NO_MASK);
/* Walk through remaining instructions until the or src reg gets
* rewritten or we get into some flow-control, eliminating the use of
* this MOV.
*/
- for (j = i + 1; j < prog->NumInstructions; j++) {
- struct prog_instruction *inst2 = prog->Instructions + j;
- GLuint arg;
+ for (j = i + 1; j < prog->arb.NumInstructions; j++) {
+ struct prog_instruction *inst2 = prog->arb.Instructions + j;
+ GLuint arg;
if (_mesa_is_flow_control_opcode(inst2->Opcode))
break;
/* First rewrite this instruction's args if appropriate. */
for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
- int comp;
- int read_mask = get_src_arg_mask(inst2, arg);
+ GLuint comp, read_mask;
if (inst2->SrcReg[arg].File != mov->DstReg.File ||
inst2->SrcReg[arg].Index != mov->DstReg.Index ||
- inst2->SrcReg[arg].RelAddr ||
- inst2->SrcReg[arg].Abs)
+ inst2->SrcReg[arg].RelAddr)
continue;
+ read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
- /* Check that all the sources for this arg of inst2 come from inst1
- * or constants.
- */
- for (comp = 0; comp < 4; comp++) {
- int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- /* If the MOV didn't write that channel, can't use it. */
- if ((read_mask & (1 << comp)) &&
- src_swz <= SWIZZLE_W &&
- (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
- break;
- }
- if (comp != 4)
- continue;
-
- /* Adjust the swizzles of inst2 to point at MOV's source */
- for (comp = 0; comp < 4; comp++) {
- int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- if (inst2_swz <= SWIZZLE_W) {
- GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
- inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
- inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
- inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
- inst2_swz) & 0x1) << comp);
- }
- }
- inst2->SrcReg[arg].File = mov->SrcReg[0].File;
- inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
+ * components read still come from the mov instructions
+ */
+ if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
+ (read_mask & dst_mask) == read_mask) {
+ for (comp = 0; comp < 4; comp++) {
+ const GLuint inst2_swz =
+ GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+ const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
+ inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
+ inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
+ inst2_swz) & 0x1) << comp);
+ }
+ inst2->SrcReg[arg].File = mov->SrcReg[0].File;
+ inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ }
}
- /* If this instruction overwrote part of the move, our time is up. */
- if ((inst2->DstReg.File == mov->DstReg.File &&
- (inst2->DstReg.RelAddr ||
- inst2->DstReg.Index == mov->DstReg.Index)) ||
- (inst2->DstReg.File == mov->SrcReg[0].File &&
- (inst2->DstReg.RelAddr ||
- inst2->DstReg.Index == mov->SrcReg[0].Index)))
- break;
+ /* The source of MOV is written. This potentially deactivates some
+ * components from the src and dst of the MOV instruction
+ */
+ if (inst2->DstReg.File == mov->DstReg.File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->DstReg.Index)) {
+ dst_mask &= ~inst2->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, dst_mask);
+ }
+
+ /* Idem when the destination of mov is written */
+ if (inst2->DstReg.File == mov->SrcReg[0].File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->SrcReg[0].Index)) {
+ src_mask &= ~inst2->DstReg.WriteMask;
+ dst_mask &= get_dst_mask_for_mov(mov, src_mask);
+ }
+ if (dst_mask == 0)
+ break;
}
}
}
}
+
+/**
+ * Complements dead_code_global. Try to remove code in block of code by
+ * carefully monitoring the swizzles. Both functions should be merged into one
+ * with a proper control flow graph
+ */
+static GLboolean
+_mesa_remove_dead_code_local(struct gl_program *prog, void *mem_ctx)
+{
+ GLboolean *removeInst;
+ GLuint i, arg, rem = 0;
+
+ removeInst =
+ calloc(prog->arb.NumInstructions, sizeof(GLboolean));
+
+ for (i = 0; i < prog->arb.NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->arb.Instructions + i;
+ const GLuint index = inst->DstReg.Index;
+ const GLuint mask = inst->DstReg.WriteMask;
+ enum inst_use use;
+
+ /* We must deactivate the pass as soon as some indirection is used */
+ if (inst->DstReg.RelAddr)
+ goto done;
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
+ if (inst->SrcReg[arg].RelAddr)
+ goto done;
+
+ if (_mesa_is_flow_control_opcode(inst->Opcode) ||
+ _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
+ inst->DstReg.File != PROGRAM_TEMPORARY ||
+ inst->DstReg.RelAddr)
+ continue;
+
+ use = find_next_use(prog, i+1, index, mask);
+ if (use == WRITE || use == END)
+ removeInst[i] = GL_TRUE;
+ }
+
+ rem = remove_instructions(prog, removeInst, mem_ctx);
+
+done:
+ free(removeInst);
+ return rem != 0;
+}
+
+
+/**
+ * Try to inject the destination of mov as the destination of inst and recompute
+ * the swizzles operators for the sources of inst if required. Return GL_TRUE
+ * of the substitution was possible, GL_FALSE otherwise
+ */
+static GLboolean
+_mesa_merge_mov_into_inst(struct prog_instruction *inst,
+ const struct prog_instruction *mov)
+{
+ /* Indirection table which associates destination and source components for
+ * the mov instruction
+ */
+ const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
+
+ /* Some components are not written by inst. We cannot remove the mov */
+ if (mask != (inst->DstReg.WriteMask & mask))
+ return GL_FALSE;
+
+ inst->Saturate |= mov->Saturate;
+
+ /* Depending on the instruction, we may need to recompute the swizzles.
+ * Also, some other instructions (like TEX) are not linear. We will only
+ * consider completely active sources and destinations
+ */
+ switch (inst->Opcode) {
+
+ /* Carstesian instructions: we compute the swizzle */
+ case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MAD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ {
+ GLuint dst_to_src_comp[4] = {0,0,0,0};
+ GLuint dst_comp, arg;
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ if (mov->DstReg.WriteMask & (1 << dst_comp)) {
+ const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
+ assert(src_comp < 4);
+ dst_to_src_comp[dst_comp] = src_comp;
+ }
+ }
+
+ /* Patch each source of the instruction */
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
+ const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
+ inst->SrcReg[arg].Swizzle = 0;
+
+ /* Reset each active component of the swizzle */
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ GLuint src_comp, arg_comp;
+ if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
+ continue;
+ src_comp = dst_to_src_comp[dst_comp];
+ assert(src_comp < 4);
+ arg_comp = GET_SWZ(arg_swz, src_comp);
+ assert(arg_comp < 4);
+ inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
+ }
+ }
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+
+ /* Dot products and scalar instructions: we only change the destination */
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ case OPCODE_LOG:
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+
+ /* All other instructions require fully active components with no swizzle */
+ default:
+ if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
+ inst->DstReg.WriteMask != WRITEMASK_XYZW)
+ return GL_FALSE;
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+}
+
+
/**
* Try to remove extraneous MOV instructions from the given program.
*/
-static void
-_mesa_remove_extra_moves(struct gl_program *prog)
+static GLboolean
+_mesa_remove_extra_moves(struct gl_program *prog, void *mem_ctx)
{
GLboolean *removeInst; /* per-instruction removal flag */
- GLuint i, rem, loopNesting = 0, subroutineNesting = 0;
+ GLuint i, rem = 0, nesting = 0;
if (dbg) {
printf("Optimize: Begin remove extra moves\n");
_mesa_print_program(prog);
}
- removeInst = (GLboolean *)
- calloc(1, prog->NumInstructions * sizeof(GLboolean));
+ removeInst =
+ calloc(prog->arb.NumInstructions, sizeof(GLboolean));
/*
* Look for sequences such as this:
* FOO tmpY, arg0, arg1;
*/
- for (i = 0; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
+ for (i = 0; i < prog->arb.NumInstructions; i++) {
+ const struct prog_instruction *mov = prog->arb.Instructions + i;
- switch (inst->Opcode) {
+ switch (mov->Opcode) {
case OPCODE_BGNLOOP:
- loopNesting++;
- break;
- case OPCODE_ENDLOOP:
- loopNesting--;
- break;
case OPCODE_BGNSUB:
- subroutineNesting++;
+ case OPCODE_IF:
+ nesting++;
break;
+ case OPCODE_ENDLOOP:
case OPCODE_ENDSUB:
- subroutineNesting--;
+ case OPCODE_ENDIF:
+ nesting--;
break;
case OPCODE_MOV:
if (i > 0 &&
- loopNesting == 0 &&
- subroutineNesting == 0 &&
- inst->SrcReg[0].File == PROGRAM_TEMPORARY &&
- inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) {
+ can_downward_mov_be_modifed(mov) &&
+ mov->SrcReg[0].File == PROGRAM_TEMPORARY &&
+ nesting == 0)
+ {
+
/* see if this MOV can be removed */
- const GLuint tempIndex = inst->SrcReg[0].Index;
+ const GLuint id = mov->SrcReg[0].Index;
struct prog_instruction *prevInst;
GLuint prevI;
prevI = i - 1;
while (prevI > 0 && removeInst[prevI])
prevI--;
- prevInst = prog->Instructions + prevI;
+ prevInst = prog->arb.Instructions + prevI;
if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
- prevInst->DstReg.Index == tempIndex &&
- prevInst->DstReg.WriteMask == WRITEMASK_XYZW) {
+ prevInst->DstReg.Index == id &&
+ prevInst->DstReg.RelAddr == 0) {
- enum temp_use next_use =
- find_next_temp_use(prog, i + 1, tempIndex);
+ const GLuint dst_mask = prevInst->DstReg.WriteMask;
+ enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
if (next_use == WRITE || next_use == END) {
/* OK, we can safely remove this MOV instruction.
* Into:
* prevI: FOO z, x, y;
*/
-
- /* patch up prev inst */
- prevInst->DstReg.File = inst->DstReg.File;
- prevInst->DstReg.Index = inst->DstReg.Index;
-
- /* flag this instruction for removal */
- removeInst[i] = GL_TRUE;
-
- if (dbg) {
- printf("Remove MOV at %u\n", i);
- printf("new prev inst %u: ", prevI);
- _mesa_print_instruction(prevInst);
+ if (_mesa_merge_mov_into_inst(prevInst, mov)) {
+ removeInst[i] = GL_TRUE;
+ if (dbg) {
+ printf("Remove MOV at %u\n", i);
+ printf("new prev inst %u: ", prevI);
+ _mesa_print_instruction(prevInst);
+ }
}
}
}
}
/* now remove the instructions which aren't needed */
- rem = remove_instructions(prog, removeInst);
+ rem = remove_instructions(prog, removeInst, mem_ctx);
free(removeInst);
printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
/*_mesa_print_program(prog);*/
}
+
+ return rem != 0;
}
{
GLuint i;
for (i = 0; i + 1 < list->Num; i++) {
- ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End);
+ assert(list->Intervals[i].End <= list->Intervals[i + 1].End);
}
}
#endif
for (k = 0; k < list->Num; k++) {
if (list->Intervals[k].Reg == inv->Reg) {
/* found, remove it */
- ASSERT(list->Intervals[k].Start == inv->Start);
- ASSERT(list->Intervals[k].End == inv->End);
+ assert(list->Intervals[k].Start == inv->Start);
+ assert(list->Intervals[k].End == inv->End);
while (k < list->Num - 1) {
list->Intervals[k] = list->Intervals[k + 1];
k++;
return 0;
}
+
/** sort the interval list according to interval starts */
static void
sort_interval_list_by_start(struct interval_list *list)
{
GLuint i;
for (i = 0; i + 1 < list->Num; i++) {
- ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
+ assert(list->Intervals[i].Start <= list->Intervals[i + 1].Start);
}
}
#endif
struct loop_info *loopStack, GLuint loopStackDepth,
GLuint index, GLuint ic)
{
- int i;
+ unsigned i;
+ GLuint begin = ic;
+ GLuint end = ic;
/* If the register is used in a loop, extend its lifetime through the end
* of the outermost loop that doesn't contain its definition.
*/
for (i = 0; i < loopStackDepth; i++) {
if (intBegin[index] < loopStack[i].Start) {
- ic = loopStack[i].End;
+ end = loopStack[i].End;
break;
}
}
- ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ /* Variables that are live at the end of a loop will also be live at the
+ * beginning, so an instruction inside of a loop should have its live
+ * interval begin at the start of the outermost loop.
+ */
+ if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) {
+ begin = loopStack[0].Start;
+ }
+
+ assert(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
if (intBegin[index] == -1) {
- ASSERT(intEnd[index] == -1);
- intBegin[index] = intEnd[index] = ic;
+ assert(intEnd[index] == -1);
+ intBegin[index] = begin;
+ intEnd[index] = end;
}
else {
- intEnd[index] = ic;
+ intEnd[index] = end;
}
}
}
/* build intermediate arrays */
- if (!_mesa_find_temp_intervals(prog->Instructions, prog->NumInstructions,
+ if (!_mesa_find_temp_intervals(prog->arb.Instructions,
+ prog->arb.NumInstructions,
intBegin, intEnd))
return GL_FALSE;
else {
/* Interval 'inv' has expired */
const GLint regNew = registerMap[inv->Reg];
- ASSERT(regNew >= 0);
+ assert(regNew >= 0);
if (dbg)
printf(" expire interval for reg %u\n", inv->Reg);
/* return register regNew to the free pool */
if (dbg)
printf(" free reg %d\n", regNew);
- ASSERT(usedRegs[regNew] == GL_TRUE);
+ assert(usedRegs[regNew] == GL_TRUE);
usedRegs[regNew] = GL_FALSE;
}
}
*/
replace_regs(prog, PROGRAM_TEMPORARY, registerMap);
- prog->NumTemporaries = maxTemp + 1;
+ prog->arb.NumTemporaries = maxTemp + 1;
}
if (dbg) {
}
+#if 0
+static void
+print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) {
+ fprintf(stderr, "%s (%u inst):\n", txt, program->arb.NumInstructions);
+ _mesa_print_program(program);
+ _mesa_print_program_parameters(ctx, program);
+ fprintf(stderr, "\n\n");
+}
+#endif
+
/**
- * Apply optimizations to the given program to eliminate unnecessary
- * instructions, temp regs, etc.
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0. The are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ * MOV T0, T2;
+ * else
+ * MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program. If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
*/
-void
-_mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
+static void
+_mesa_simplify_cmp(struct gl_program * program)
{
- _mesa_remove_extra_move_use(program);
+ GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
+ GLuint outputWrites[MAX_PROGRAM_OUTPUTS];
+ GLuint i;
- if (1)
- _mesa_remove_dead_code(program);
+ if (dbg) {
+ printf("Optimize: Begin reads without writes\n");
+ _mesa_print_program(program);
+ }
- if (0) /* not tested much yet */
- _mesa_remove_extra_moves(program);
+ for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
+ tempWrites[i] = 0;
+ }
- if (0)
- _mesa_consolidate_registers(program);
- else
+ for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) {
+ outputWrites[i] = 0;
+ }
+
+ for (i = 0; i < program->arb.NumInstructions; i++) {
+ struct prog_instruction *inst = program->arb.Instructions + i;
+ GLuint prevWriteMask;
+
+ /* Give up if we encounter relative addressing or flow control. */
+ if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) {
+ return;
+ }
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT) {
+ assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS);
+ prevWriteMask = outputWrites[inst->DstReg.Index];
+ outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
+ } else if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+ assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
+ prevWriteMask = tempWrites[inst->DstReg.Index];
+ tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask;
+ } else {
+ /* No other register type can be a destination register. */
+ continue;
+ }
+
+ /* For a CMP to be considered a conditional write, the destination
+ * register and source register two must be the same. */
+ if (inst->Opcode == OPCODE_CMP
+ && !(inst->DstReg.WriteMask & prevWriteMask)
+ && inst->SrcReg[2].File == inst->DstReg.File
+ && inst->SrcReg[2].Index == inst->DstReg.Index
+ && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) {
+
+ inst->Opcode = OPCODE_MOV;
+ inst->SrcReg[0] = inst->SrcReg[1];
+
+ /* Unused operands are expected to have the file set to
+ * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes
+ * all of the sources.
+ */
+ inst->SrcReg[1].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
+ inst->SrcReg[2].File = PROGRAM_UNDEFINED;
+ inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
+ }
+ }
+ if (dbg) {
+ printf("Optimize: End reads without writes\n");
+ _mesa_print_program(program);
+ }
+}
+
+/**
+ * Apply optimizations to the given program to eliminate unnecessary
+ * instructions, temp regs, etc.
+ */
+void
+_mesa_optimize_program(struct gl_context *ctx, struct gl_program *program,
+ void *mem_ctx)
+{
+ GLboolean any_change;
+
+ _mesa_simplify_cmp(program);
+ /* Stop when no modifications were output */
+ do {
+ any_change = GL_FALSE;
+ _mesa_remove_extra_move_use(program);
+ if (_mesa_remove_dead_code_global(program, mem_ctx))
+ any_change = GL_TRUE;
+ if (_mesa_remove_extra_moves(program, mem_ctx))
+ any_change = GL_TRUE;
+ if (_mesa_remove_dead_code_local(program, mem_ctx))
+ any_change = GL_TRUE;
+
+ any_change = _mesa_constant_fold(program) || any_change;
_mesa_reallocate_registers(program);
+ } while (any_change);
}
+