From 77fdfaa23adeaaf6a217ef1ee751410c6a5b0d21 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 5 Jul 2008 20:01:20 +0200 Subject: [PATCH] r300: Correctly scan for used temporary registers This fixes a regression introduced by dea8719f0... --- src/mesa/drivers/dri/r300/r300_fragprog.c | 28 +++---- src/mesa/drivers/dri/r300/r500_fragprog.c | 18 ++-- src/mesa/drivers/dri/r300/radeon_program.c | 52 ++++++++++-- src/mesa/drivers/dri/r300/radeon_program.h | 20 ++++- .../drivers/dri/r300/radeon_program_alu.c | 84 +++++++++---------- .../drivers/dri/r300/radeon_program_alu.h | 3 +- 6 files changed, 126 insertions(+), 79 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 8c49e8ada68..6a8ef0ef5fc 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -68,7 +68,7 @@ static void reset_srcreg(struct prog_src_register* reg) * be reused. */ static GLboolean transform_TEX( - GLcontext *ctx, struct gl_program *p, + struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { struct r300_fragment_program_compiler *compiler = @@ -84,11 +84,11 @@ static GLboolean transform_TEX( return GL_FALSE; if (inst.Opcode != OPCODE_KIL && - p->ShadowSamplers & (1 << inst.TexSrcUnit)) { + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); tgt->Opcode = OPCODE_MOV; tgt->DstReg = inst.DstReg; @@ -98,7 +98,7 @@ static GLboolean transform_TEX( } inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + inst.DstReg.Index = radeonFindFreeTemporary(t); inst.DstReg.WriteMask = WRITEMASK_XYZW; } @@ -113,7 +113,7 @@ static GLboolean transform_TEX( 0 }; - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int tempreg = radeonFindFreeTemporary(t); int factor_index; tokens[2] = inst.TexSrcUnit; @@ -121,7 +121,7 @@ static GLboolean transform_TEX( _mesa_add_state_reference( compiler->fp->mesa_program.Base.Parameters, tokens); - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); tgt->Opcode = OPCODE_MUL; tgt->DstReg.File = PROGRAM_TEMPORARY; @@ -140,9 +140,9 @@ static GLboolean transform_TEX( */ if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP || inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int tempreg = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); tgt->Opcode = OPCODE_MOV; tgt->DstReg.File = PROGRAM_TEMPORARY; @@ -157,7 +157,7 @@ static GLboolean transform_TEX( if (inst.Opcode != OPCODE_KIL) { if (inst.DstReg.File != PROGRAM_TEMPORARY || inst.DstReg.WriteMask != WRITEMASK_XYZW) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int tempreg = radeonFindFreeTemporary(t); inst.DstReg.File = PROGRAM_TEMPORARY; inst.DstReg.Index = tempreg; @@ -166,16 +166,16 @@ static GLboolean transform_TEX( } } - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); if (inst.Opcode != OPCODE_KIL && - p->ShadowSamplers & (1 << inst.TexSrcUnit)) { + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int rcptemp = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(p, 3); + tgt = radeonAppendInstructions(t->Program, 3); tgt[0].Opcode = OPCODE_RCP; tgt[0].DstReg.File = PROGRAM_TEMPORARY; @@ -222,7 +222,7 @@ static GLboolean transform_TEX( tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111; } } else if (destredirect) { - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); tgt->Opcode = OPCODE_MOV; tgt->DstReg = orig_inst->DstReg; diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b46e924ac7b..7ee84947225 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -38,7 +38,7 @@ * */ static GLboolean transform_TEX( - GLcontext *ctx, struct gl_program *p, + struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { struct r500_fragment_program_compiler *compiler = @@ -55,11 +55,11 @@ static GLboolean transform_TEX( /* ARB_shadow & EXT_shadow_funcs */ if (inst.Opcode != OPCODE_KIL && - p->ShadowSamplers & (1 << inst.TexSrcUnit)) { + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); tgt->Opcode = OPCODE_MOV; tgt->DstReg.File = inst.DstReg.File; @@ -71,20 +71,20 @@ static GLboolean transform_TEX( } inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + inst.DstReg.Index = radeonFindFreeTemporary(t); inst.DstReg.WriteMask = WRITEMASK_XYZW; } - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); if (inst.Opcode != OPCODE_KIL && - p->ShadowSamplers & (1 << inst.TexSrcUnit)) { + t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) { GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; - int rcptemp = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int rcptemp = radeonFindFreeTemporary(t); - tgt = radeonAppendInstructions(p, 3); + tgt = radeonAppendInstructions(t->Program, 3); tgt[0].Opcode = OPCODE_RCP; tgt[0].DstReg.File = PROGRAM_TEMPORARY; @@ -131,7 +131,7 @@ static GLboolean transform_TEX( tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111; } } else if (destredirect) { - tgt = radeonAppendInstructions(p, 1); + tgt = radeonAppendInstructions(t->Program, 1); tgt->Opcode = OPCODE_MOV; tgt->DstReg = orig_inst->DstReg; diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c index 3112339f81c..da5e7aefce5 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ b/src/mesa/drivers/dri/r300/radeon_program.c @@ -46,29 +46,30 @@ * one instruction at a time. */ void radeonLocalTransform( - GLcontext *ctx, + GLcontext *Ctx, struct gl_program *program, int num_transformations, struct radeon_program_transformation* transformations) { - struct prog_instruction *source; - int numinstructions; + struct radeon_transform_context ctx; int ip; - source = program->Instructions; - numinstructions = program->NumInstructions; + ctx.Ctx = Ctx; + ctx.Program = program; + ctx.OldInstructions = program->Instructions; + ctx.OldNumInstructions = program->NumInstructions; program->Instructions = 0; program->NumInstructions = 0; - for(ip = 0; ip < numinstructions; ++ip) { - struct prog_instruction *instr = source + ip; + for(ip = 0; ip < ctx.OldNumInstructions; ++ip) { + struct prog_instruction *instr = ctx.OldInstructions + ip; int i; for(i = 0; i < num_transformations; ++i) { struct radeon_program_transformation* t = transformations + i; - if (t->function(ctx, program, instr, t->userData)) + if (t->function(&ctx, instr, t->userData)) break; } @@ -78,7 +79,40 @@ void radeonLocalTransform( } } - _mesa_free_instructions(source, numinstructions); + _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions); +} + + +static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count) +{ + GLuint i; + for (i = 0; i < count; i++) { + const struct prog_instruction *inst = insts + i; + const GLuint n = _mesa_num_inst_src_regs(inst->Opcode); + GLuint k; + + for (k = 0; k < n; k++) { + if (inst->SrcReg[k].File == PROGRAM_TEMPORARY) + used[inst->SrcReg[k].Index] = GL_TRUE; + } + } +} + +GLint radeonFindFreeTemporary(struct radeon_transform_context *t) +{ + GLboolean used[MAX_PROGRAM_TEMPS]; + GLuint i; + + _mesa_memset(used, 0, sizeof(used)); + scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions); + scan_instructions(used, t->OldInstructions, t->OldNumInstructions); + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!used[i]) + return i; + } + + return -1; } diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h index 012104fa5ae..ba76bc47cfb 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/radeon_program.h @@ -48,6 +48,19 @@ enum { #define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) #define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) +/** + * Transformation context that is passed to local transformations. + * + * Care must be taken with some operations during transformation, + * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary + */ +struct radeon_transform_context { + GLcontext *Ctx; + struct gl_program *Program; + struct prog_instruction *OldInstructions; + GLuint OldNumInstructions; +}; + /** * A transformation that can be passed to \ref radeonLocalTransform. * @@ -60,8 +73,7 @@ enum { */ struct radeon_program_transformation { GLboolean (*function)( - GLcontext*, - struct gl_program*, + struct radeon_transform_context*, struct prog_instruction*, void*); void *userData; @@ -73,6 +85,10 @@ void radeonLocalTransform( int num_transformations, struct radeon_program_transformation* transformations); +/** + * Find a usable free temporary register during program transformation + */ +GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx); struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count); diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c index 3104d07facd..d6d016d7c12 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -148,24 +148,24 @@ static struct prog_src_register scalar(struct prog_src_register reg) return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); } -static void transform_ABS(struct gl_program* p, +static void transform_ABS(struct radeon_transform_context* t, struct prog_instruction* inst) { struct prog_src_register src = inst->SrcReg[0]; src.Abs = 1; src.NegateBase = 0; src.NegateAbs = 0; - emit1(p, OPCODE_MOV, inst->DstReg, src); + emit1(t->Program, OPCODE_MOV, inst->DstReg, src); } -static void transform_DPH(struct gl_program* p, +static void transform_DPH(struct radeon_transform_context* t, struct prog_instruction* inst) { struct prog_src_register src0 = inst->SrcReg[0]; if (src0.NegateAbs) { if (src0.Abs) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); - emit1(p, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0); + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0); src0 = srcreg(src0.File, src0.Index); } else { src0.NegateAbs = 0; @@ -174,70 +174,70 @@ static void transform_DPH(struct gl_program* p, } set_swizzle(&src0, 3, SWIZZLE_ONE); set_negate_base(&src0, 3, 0); - emit2(p, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]); + emit2(t->Program, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]); } -static void transform_FLR(struct gl_program* p, +static void transform_FLR(struct radeon_transform_context* t, struct prog_instruction* inst) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); - emit1(p, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); - emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); + int tempreg = radeonFindFreeTemporary(t); + emit1(t->Program, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); + emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); } -static void transform_POW(struct gl_program* p, +static void transform_POW(struct radeon_transform_context* t, struct prog_instruction* inst) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int tempreg = radeonFindFreeTemporary(t); struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); tempdst.WriteMask = WRITEMASK_W; tempsrc.Swizzle = SWIZZLE_WWWW; - emit1(p, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0])); - emit2(p, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1])); - emit1(p, OPCODE_EX2, inst->DstReg, tempsrc); + emit1(t->Program, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0])); + emit2(t->Program, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1])); + emit1(t->Program, OPCODE_EX2, inst->DstReg, tempsrc); } -static void transform_SGE(struct gl_program* p, +static void transform_SGE(struct radeon_transform_context* t, struct prog_instruction* inst) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int tempreg = radeonFindFreeTemporary(t); - emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); + emit2(t->Program, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); } -static void transform_SLT(struct gl_program* p, +static void transform_SLT(struct radeon_transform_context* t, struct prog_instruction* inst) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int tempreg = radeonFindFreeTemporary(t); - emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); - emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); + emit2(t->Program, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(t->Program, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); } -static void transform_SUB(struct gl_program* p, +static void transform_SUB(struct radeon_transform_context* t, struct prog_instruction* inst) { - emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); + emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); } -static void transform_SWZ(struct gl_program* p, +static void transform_SWZ(struct radeon_transform_context* t, struct prog_instruction* inst) { - emit1(p, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]); + emit1(t->Program, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]); } -static void transform_XPD(struct gl_program* p, +static void transform_XPD(struct radeon_transform_context* t, struct prog_instruction* inst) { - int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY); + int tempreg = radeonFindFreeTemporary(t); - emit2(p, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg), + emit2(t->Program, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg), swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); - emit3(p, OPCODE_MAD, inst->DstReg, + emit3(t->Program, OPCODE_MAD, inst->DstReg, swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), negate(srcreg(PROGRAM_TEMPORARY, tempreg))); @@ -257,22 +257,20 @@ static void transform_XPD(struct gl_program* p, * * @todo add LIT here as well? */ -GLboolean radeonTransformALU( - GLcontext* ctx, - struct gl_program* prog, +GLboolean radeonTransformALU(struct radeon_transform_context* t, struct prog_instruction* inst, void* unused) { switch(inst->Opcode) { - case OPCODE_ABS: transform_ABS(prog, inst); return GL_TRUE; - case OPCODE_DPH: transform_DPH(prog, inst); return GL_TRUE; - case OPCODE_FLR: transform_FLR(prog, inst); return GL_TRUE; - case OPCODE_POW: transform_POW(prog, inst); return GL_TRUE; - case OPCODE_SGE: transform_SGE(prog, inst); return GL_TRUE; - case OPCODE_SLT: transform_SLT(prog, inst); return GL_TRUE; - case OPCODE_SUB: transform_SUB(prog, inst); return GL_TRUE; - case OPCODE_SWZ: transform_SWZ(prog, inst); return GL_TRUE; - case OPCODE_XPD: transform_XPD(prog, inst); return GL_TRUE; + case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; default: return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h index f5beb9f8c30..858c5ed0b8c 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_alu.h +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h @@ -31,8 +31,7 @@ #include "radeon_program.h" GLboolean radeonTransformALU( - GLcontext*, - struct gl_program*, + struct radeon_transform_context *t, struct prog_instruction*, void*); -- 2.30.2