X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fprogram%2Fprog_optimize.c;h=08c1c3046e72e0271d8f20dcb5bf7326ae1ba2a2;hb=c35f14f36880eb20f5e54480444e343520e9bec5;hp=96971f2eda48fe9f6112c11a4b0c6fda2135684c;hpb=f9995b30756140724f41daf963fa06167912be7f;p=mesa.git diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 96971f2eda4..08c1c3046e7 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -1,6 +1,5 @@ /* * Mesa 3-D graphics library - * Version: 7.5 * * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * @@ -74,6 +73,17 @@ get_src_arg_mask(const struct prog_instruction *inst, case OPCODE_MAD: case OPCODE_MUL: case OPCODE_SUB: + case OPCODE_CMP: + case OPCODE_FLR: + case OPCODE_FRC: + case OPCODE_LRP: + case OPCODE_SEQ: + case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SLE: + case OPCODE_SLT: + case OPCODE_SNE: + case OPCODE_SSG: channel_mask = inst->DstReg.WriteMask & dst_mask; break; case OPCODE_RCP: @@ -249,8 +259,8 @@ _mesa_remove_dead_code_global(struct gl_program *prog) /*_mesa_print_program(prog);*/ } - removeInst = (GLboolean *) - calloc(1, prog->NumInstructions * sizeof(GLboolean)); + removeInst = + calloc(prog->NumInstructions, sizeof(GLboolean)); /* Determine which temps are read and written */ for (i = 0; i < prog->NumInstructions; i++) { @@ -381,7 +391,6 @@ find_next_use(const struct gl_program *prog, switch (inst->Opcode) { case OPCODE_BGNLOOP: case OPCODE_BGNSUB: - case OPCODE_BRA: case OPCODE_CAL: case OPCODE_CONT: case OPCODE_IF: @@ -428,7 +437,6 @@ _mesa_is_flow_control_opcode(enum prog_opcode opcode) switch (opcode) { case OPCODE_BGNLOOP: case OPCODE_BGNSUB: - case OPCODE_BRA: case OPCODE_CAL: case OPCODE_CONT: case OPCODE_IF: @@ -461,8 +469,7 @@ can_downward_mov_be_modifed(const struct prog_instruction *mov) mov->SrcReg[0].HasIndex2 == 0 && mov->SrcReg[0].RelAddr2 == 0 && mov->DstReg.RelAddr == 0 && - mov->DstReg.CondMask == COND_TR && - mov->SaturateMode == SATURATE_OFF; + mov->DstReg.CondMask == COND_TR; } @@ -471,7 +478,8 @@ can_upward_mov_be_modifed(const struct prog_instruction *mov) { return can_downward_mov_be_modifed(mov) && - mov->DstReg.File == PROGRAM_TEMPORARY; + mov->DstReg.File == PROGRAM_TEMPORARY && + mov->SaturateMode == SATURATE_OFF; } @@ -593,8 +601,8 @@ _mesa_remove_dead_code_local(struct gl_program *prog) GLboolean *removeInst; GLuint i, arg, rem = 0; - removeInst = (GLboolean *) - calloc(1, prog->NumInstructions * sizeof(GLboolean)); + removeInst = + calloc(prog->NumInstructions, sizeof(GLboolean)); for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; @@ -646,6 +654,8 @@ _mesa_merge_mov_into_inst(struct prog_instruction *inst, if (mask != (inst->DstReg.WriteMask & mask)) return GL_FALSE; + inst->SaturateMode |= mov->SaturateMode; + /* Depending on the instruction, we may need to recompute the swizzles. * Also, some other instructions (like TEX) are not linear. We will only * consider completely active sources and destinations @@ -732,8 +742,8 @@ _mesa_remove_extra_moves(struct gl_program *prog) _mesa_print_program(prog); } - removeInst = (GLboolean *) - calloc(1, prog->NumInstructions * sizeof(GLboolean)); + removeInst = + calloc(prog->NumInstructions, sizeof(GLboolean)); /* * Look for sequences such as this: @@ -778,7 +788,6 @@ _mesa_remove_extra_moves(struct gl_program *prog) if (prevInst->DstReg.File == PROGRAM_TEMPORARY && prevInst->DstReg.Index == id && prevInst->DstReg.RelAddr == 0 && - prevInst->DstReg.CondSrc == 0 && prevInst->DstReg.CondMask == COND_TR) { const GLuint dst_mask = prevInst->DstReg.WriteMask; @@ -937,24 +946,35 @@ update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) { int i; + GLuint begin = ic; + GLuint end = ic; /* If the register is used in a loop, extend its lifetime through the end * of the outermost loop that doesn't contain its definition. */ for (i = 0; i < loopStackDepth; i++) { if (intBegin[index] < loopStack[i].Start) { - ic = loopStack[i].End; + end = loopStack[i].End; break; } } + /* Variables that are live at the end of a loop will also be live at the + * beginning, so an instruction inside of a loop should have its live + * interval begin at the start of the outermost loop. + */ + if (loopStackDepth > 0 && ic > loopStack[0].Start && ic < loopStack[0].End) { + begin = loopStack[0].Start; + } + ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); if (intBegin[index] == -1) { ASSERT(intEnd[index] == -1); - intBegin[index] = intEnd[index] = ic; + intBegin[index] = begin; + intEnd[index] = end; } else { - intEnd[index] = ic; + intEnd[index] = end; } } @@ -1224,6 +1244,95 @@ print_it(struct gl_context *ctx, struct gl_program *program, const char *txt) { } #endif +/** + * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP + * instruction is the first instruction to write to register T0. The are + * several lowering passes done in GLSL IR (e.g. branches and + * relative addressing) that create a large number of conditional assignments + * that ir_to_mesa converts to CMP instructions like the one mentioned above. + * + * Here is why this conversion is safe: + * CMP T0, T1 T2 T0 can be expanded to: + * if (T1 < 0.0) + * MOV T0, T2; + * else + * MOV T0, T0; + * + * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same + * as the original program. If (T1 < 0.0) evaluates to false, executing + * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. + * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 + * because any instruction that was going to read from T0 after this was going + * to read a garbage value anyway. + */ +static void +_mesa_simplify_cmp(struct gl_program * program) +{ + GLuint tempWrites[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; + GLuint outputWrites[MAX_PROGRAM_OUTPUTS]; + GLuint i; + + if (dbg) { + printf("Optimize: Begin reads without writes\n"); + _mesa_print_program(program); + } + + for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { + tempWrites[i] = 0; + } + + for (i = 0; i < MAX_PROGRAM_OUTPUTS; i++) { + outputWrites[i] = 0; + } + + for (i = 0; i < program->NumInstructions; i++) { + struct prog_instruction *inst = program->Instructions + i; + GLuint prevWriteMask; + + /* Give up if we encounter relative addressing or flow control. */ + if (_mesa_is_flow_control_opcode(inst->Opcode) || inst->DstReg.RelAddr) { + return; + } + + if (inst->DstReg.File == PROGRAM_OUTPUT) { + assert(inst->DstReg.Index < MAX_PROGRAM_OUTPUTS); + prevWriteMask = outputWrites[inst->DstReg.Index]; + outputWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask; + } else if (inst->DstReg.File == PROGRAM_TEMPORARY) { + assert(inst->DstReg.Index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); + prevWriteMask = tempWrites[inst->DstReg.Index]; + tempWrites[inst->DstReg.Index] |= inst->DstReg.WriteMask; + } else { + /* No other register type can be a destination register. */ + continue; + } + + /* For a CMP to be considered a conditional write, the destination + * register and source register two must be the same. */ + if (inst->Opcode == OPCODE_CMP + && !(inst->DstReg.WriteMask & prevWriteMask) + && inst->SrcReg[2].File == inst->DstReg.File + && inst->SrcReg[2].Index == inst->DstReg.Index + && inst->DstReg.WriteMask == get_src_arg_mask(inst, 2, NO_MASK)) { + + inst->Opcode = OPCODE_MOV; + inst->SrcReg[0] = inst->SrcReg[1]; + + /* Unused operands are expected to have the file set to + * PROGRAM_UNDEFINED. This is how _mesa_init_instructions initializes + * all of the sources. + */ + inst->SrcReg[1].File = PROGRAM_UNDEFINED; + inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; + inst->SrcReg[2].File = PROGRAM_UNDEFINED; + inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; + } + } + if (dbg) { + printf("Optimize: End reads without writes\n"); + _mesa_print_program(program); + } +} /** * Apply optimizations to the given program to eliminate unnecessary @@ -1234,6 +1343,7 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) { GLboolean any_change; + _mesa_simplify_cmp(program); /* Stop when no modifications were output */ do { any_change = GL_FALSE; @@ -1244,6 +1354,8 @@ _mesa_optimize_program(struct gl_context *ctx, struct gl_program *program) any_change = GL_TRUE; if (_mesa_remove_dead_code_local(program)) any_change = GL_TRUE; + + any_change = _mesa_constant_fold(program) || any_change; _mesa_reallocate_registers(program); } while (any_change); }