r300: Correctly scan for used temporary registers
authorNicolai Haehnle <nhaehnle@gmail.com>
Sat, 5 Jul 2008 18:01:20 +0000 (20:01 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sat, 5 Jul 2008 18:01:20 +0000 (20:01 +0200)
This fixes a regression introduced by dea8719f0...

src/mesa/drivers/dri/r300/r300_fragprog.c
src/mesa/drivers/dri/r300/r500_fragprog.c
src/mesa/drivers/dri/r300/radeon_program.c
src/mesa/drivers/dri/r300/radeon_program.h
src/mesa/drivers/dri/r300/radeon_program_alu.c
src/mesa/drivers/dri/r300/radeon_program_alu.h

index 8c49e8ada683befde2ba9e08e8494625ca64f6f4..6a8ef0ef5fc3be5549d75b0b4c9291f47bd3ebc1 100644 (file)
@@ -68,7 +68,7 @@ static void reset_srcreg(struct prog_src_register* reg)
  * be reused.
  */
 static GLboolean transform_TEX(
-       GLcontext *ctx, struct gl_program *p,
+       struct radeon_transform_context *t,
        struct prog_instruction* orig_inst, void* data)
 {
        struct r300_fragment_program_compiler *compiler =
@@ -84,11 +84,11 @@ static GLboolean transform_TEX(
                return GL_FALSE;
 
        if (inst.Opcode != OPCODE_KIL &&
-           p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+           t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
                GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
 
                if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
-                       tgt = radeonAppendInstructions(p, 1);
+                       tgt = radeonAppendInstructions(t->Program, 1);
 
                        tgt->Opcode = OPCODE_MOV;
                        tgt->DstReg = inst.DstReg;
@@ -98,7 +98,7 @@ static GLboolean transform_TEX(
                }
 
                inst.DstReg.File = PROGRAM_TEMPORARY;
-               inst.DstReg.Index = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+               inst.DstReg.Index = radeonFindFreeTemporary(t);
                inst.DstReg.WriteMask = WRITEMASK_XYZW;
        }
 
@@ -113,7 +113,7 @@ static GLboolean transform_TEX(
                        0
                };
 
-               int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+               int tempreg = radeonFindFreeTemporary(t);
                int factor_index;
 
                tokens[2] = inst.TexSrcUnit;
@@ -121,7 +121,7 @@ static GLboolean transform_TEX(
                        _mesa_add_state_reference(
                                compiler->fp->mesa_program.Base.Parameters, tokens);
 
-               tgt = radeonAppendInstructions(p, 1);
+               tgt = radeonAppendInstructions(t->Program, 1);
 
                tgt->Opcode = OPCODE_MUL;
                tgt->DstReg.File = PROGRAM_TEMPORARY;
@@ -140,9 +140,9 @@ static GLboolean transform_TEX(
         */
        if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP ||
            inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) {
-               int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+               int tempreg = radeonFindFreeTemporary(t);
 
-               tgt = radeonAppendInstructions(p, 1);
+               tgt = radeonAppendInstructions(t->Program, 1);
 
                tgt->Opcode = OPCODE_MOV;
                tgt->DstReg.File = PROGRAM_TEMPORARY;
@@ -157,7 +157,7 @@ static GLboolean transform_TEX(
        if (inst.Opcode != OPCODE_KIL) {
                if (inst.DstReg.File != PROGRAM_TEMPORARY ||
                    inst.DstReg.WriteMask != WRITEMASK_XYZW) {
-                       int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+                       int tempreg = radeonFindFreeTemporary(t);
 
                        inst.DstReg.File = PROGRAM_TEMPORARY;
                        inst.DstReg.Index = tempreg;
@@ -166,16 +166,16 @@ static GLboolean transform_TEX(
                }
        }
 
-       tgt = radeonAppendInstructions(p, 1);
+       tgt = radeonAppendInstructions(t->Program, 1);
        _mesa_copy_instructions(tgt, &inst, 1);
 
        if (inst.Opcode != OPCODE_KIL &&
-           p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+           t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
                GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
                GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
-               int rcptemp = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+               int rcptemp = radeonFindFreeTemporary(t);
 
-               tgt = radeonAppendInstructions(p, 3);
+               tgt = radeonAppendInstructions(t->Program, 3);
 
                tgt[0].Opcode = OPCODE_RCP;
                tgt[0].DstReg.File = PROGRAM_TEMPORARY;
@@ -222,7 +222,7 @@ static GLboolean transform_TEX(
                        tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111;
                }
        } else if (destredirect) {
-               tgt = radeonAppendInstructions(p, 1);
+               tgt = radeonAppendInstructions(t->Program, 1);
 
                tgt->Opcode = OPCODE_MOV;
                tgt->DstReg = orig_inst->DstReg;
index b46e924ac7b71544778b7d69daee71edceeb4f1e..7ee84947225c7ec02af919545b77d3510ef3135d 100644 (file)
@@ -38,7 +38,7 @@
  *
  */
 static GLboolean transform_TEX(
-       GLcontext *ctx, struct gl_program *p,
+       struct radeon_transform_context *t,
        struct prog_instruction* orig_inst, void* data)
 {
        struct r500_fragment_program_compiler *compiler =
@@ -55,11 +55,11 @@ static GLboolean transform_TEX(
 
        /* ARB_shadow & EXT_shadow_funcs */
        if (inst.Opcode != OPCODE_KIL &&
-           p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+           t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
                GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
 
                if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
-                       tgt = radeonAppendInstructions(p, 1);
+                       tgt = radeonAppendInstructions(t->Program, 1);
 
                        tgt->Opcode = OPCODE_MOV;
                        tgt->DstReg.File = inst.DstReg.File;
@@ -71,20 +71,20 @@ static GLboolean transform_TEX(
                }
 
                inst.DstReg.File = PROGRAM_TEMPORARY;
-               inst.DstReg.Index = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+               inst.DstReg.Index = radeonFindFreeTemporary(t);
                inst.DstReg.WriteMask = WRITEMASK_XYZW;
        }
 
-       tgt = radeonAppendInstructions(p, 1);
+       tgt = radeonAppendInstructions(t->Program, 1);
        _mesa_copy_instructions(tgt, &inst, 1);
 
        if (inst.Opcode != OPCODE_KIL &&
-           p->ShadowSamplers & (1 << inst.TexSrcUnit)) {
+           t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
                GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
                GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
-               int rcptemp = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+               int rcptemp = radeonFindFreeTemporary(t);
 
-               tgt = radeonAppendInstructions(p, 3);
+               tgt = radeonAppendInstructions(t->Program, 3);
 
                tgt[0].Opcode = OPCODE_RCP;
                tgt[0].DstReg.File = PROGRAM_TEMPORARY;
@@ -131,7 +131,7 @@ static GLboolean transform_TEX(
                        tgt[2].SrcReg[2].Swizzle = SWIZZLE_1111;
                }
        } else if (destredirect) {
-               tgt = radeonAppendInstructions(p, 1);
+               tgt = radeonAppendInstructions(t->Program, 1);
 
                tgt->Opcode = OPCODE_MOV;
                tgt->DstReg = orig_inst->DstReg;
index 3112339f81c7010542b57ec0c63d90eb634965a8..da5e7aefce5f4889008b5763b2242cb9bdda454d 100644 (file)
  * one instruction at a time.
  */
 void radeonLocalTransform(
-       GLcontext *ctx,
+       GLcontext *Ctx,
        struct gl_program *program,
        int num_transformations,
        struct radeon_program_transformation* transformations)
 {
-       struct prog_instruction *source;
-       int numinstructions;
+       struct radeon_transform_context ctx;
        int ip;
 
-       source = program->Instructions;
-       numinstructions = program->NumInstructions;
+       ctx.Ctx = Ctx;
+       ctx.Program = program;
+       ctx.OldInstructions = program->Instructions;
+       ctx.OldNumInstructions = program->NumInstructions;
 
        program->Instructions = 0;
        program->NumInstructions = 0;
 
-       for(ip = 0; ip < numinstructions; ++ip) {
-               struct prog_instruction *instr = source + ip;
+       for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
+               struct prog_instruction *instr = ctx.OldInstructions + ip;
                int i;
 
                for(i = 0; i < num_transformations; ++i) {
                        struct radeon_program_transformation* t = transformations + i;
 
-                       if (t->function(ctx, program, instr, t->userData))
+                       if (t->function(&ctx, instr, t->userData))
                                break;
                }
 
@@ -78,7 +79,40 @@ void radeonLocalTransform(
                }
        }
 
-       _mesa_free_instructions(source, numinstructions);
+       _mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
+}
+
+
+static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
+{
+       GLuint i;
+       for (i = 0; i < count; i++) {
+               const struct prog_instruction *inst = insts + i;
+               const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
+               GLuint k;
+
+               for (k = 0; k < n; k++) {
+                       if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
+                               used[inst->SrcReg[k].Index] = GL_TRUE;
+               }
+       }
+}
+
+GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
+{
+       GLboolean used[MAX_PROGRAM_TEMPS];
+       GLuint i;
+
+       _mesa_memset(used, 0, sizeof(used));
+       scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
+       scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
+
+       for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+               if (!used[i])
+                       return i;
+       }
+
+       return -1;
 }
 
 
index 012104fa5aec05afd3fe4a3e2b884b34c4e8cfe6..ba76bc47cfbcb79dd4ce64d4108f65f00985e26c 100644 (file)
@@ -48,6 +48,19 @@ enum {
 #define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
 #define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
 
+/**
+ * Transformation context that is passed to local transformations.
+ *
+ * Care must be taken with some operations during transformation,
+ * e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
+ */
+struct radeon_transform_context {
+       GLcontext *Ctx;
+       struct gl_program *Program;
+       struct prog_instruction *OldInstructions;
+       GLuint OldNumInstructions;
+};
+
 /**
  * A transformation that can be passed to \ref radeonLocalTransform.
  *
@@ -60,8 +73,7 @@ enum {
  */
 struct radeon_program_transformation {
        GLboolean (*function)(
-               GLcontext*,
-               struct gl_program*,
+               struct radeon_transform_context*,
                struct prog_instruction*,
                void*);
        void *userData;
@@ -73,6 +85,10 @@ void radeonLocalTransform(
        int num_transformations,
        struct radeon_program_transformation* transformations);
 
+/**
+ * Find a usable free temporary register during program transformation
+ */
+GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
 
 struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
 
index 3104d07facdacf8bdc4aaa6a27e9e1eb68bdb87b..d6d016d7c12c6e47de4d7d8b380a5346cbe24368 100644 (file)
@@ -148,24 +148,24 @@ static struct prog_src_register scalar(struct prog_src_register reg)
        return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
 }
 
-static void transform_ABS(struct gl_program* p,
+static void transform_ABS(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
        struct prog_src_register src = inst->SrcReg[0];
        src.Abs = 1;
        src.NegateBase = 0;
        src.NegateAbs = 0;
-       emit1(p, OPCODE_MOV, inst->DstReg, src);
+       emit1(t->Program, OPCODE_MOV, inst->DstReg, src);
 }
 
-static void transform_DPH(struct gl_program* p,
+static void transform_DPH(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
        struct prog_src_register src0 = inst->SrcReg[0];
        if (src0.NegateAbs) {
                if (src0.Abs) {
-                       int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
-                       emit1(p, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0);
+                       int tempreg = radeonFindFreeTemporary(t);
+                       emit1(t->Program, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0);
                        src0 = srcreg(src0.File, src0.Index);
                } else {
                        src0.NegateAbs = 0;
@@ -174,70 +174,70 @@ static void transform_DPH(struct gl_program* p,
        }
        set_swizzle(&src0, 3, SWIZZLE_ONE);
        set_negate_base(&src0, 3, 0);
-       emit2(p, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]);
+       emit2(t->Program, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]);
 }
 
-static void transform_FLR(struct gl_program* p,
+static void transform_FLR(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
-       int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
-       emit1(p, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
-       emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
+       int tempreg = radeonFindFreeTemporary(t);
+       emit1(t->Program, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
+       emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
 }
 
-static void transform_POW(struct gl_program* p,
+static void transform_POW(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
-       int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+       int tempreg = radeonFindFreeTemporary(t);
        struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
        struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
        tempdst.WriteMask = WRITEMASK_W;
        tempsrc.Swizzle = SWIZZLE_WWWW;
 
-       emit1(p, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0]));
-       emit2(p, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1]));
-       emit1(p, OPCODE_EX2, inst->DstReg, tempsrc);
+       emit1(t->Program, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0]));
+       emit2(t->Program, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1]));
+       emit1(t->Program, OPCODE_EX2, inst->DstReg, tempsrc);
 }
 
-static void transform_SGE(struct gl_program* p,
+static void transform_SGE(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
-       int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+       int tempreg = radeonFindFreeTemporary(t);
 
-       emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
-       emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
+       emit2(t->Program, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+       emit3(t->Program, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
 }
 
-static void transform_SLT(struct gl_program* p,
+static void transform_SLT(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
-       int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+       int tempreg = radeonFindFreeTemporary(t);
 
-       emit2(p, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
-       emit3(p, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
+       emit2(t->Program, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
+       emit3(t->Program, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
 }
 
-static void transform_SUB(struct gl_program* p,
+static void transform_SUB(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
-       emit2(p, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
+       emit2(t->Program, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
 }
 
-static void transform_SWZ(struct gl_program* p,
+static void transform_SWZ(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
-       emit1(p, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]);
+       emit1(t->Program, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]);
 }
 
-static void transform_XPD(struct gl_program* p,
+static void transform_XPD(struct radeon_transform_context* t,
        struct prog_instruction* inst)
 {
-       int tempreg = _mesa_find_free_register(p, PROGRAM_TEMPORARY);
+       int tempreg = radeonFindFreeTemporary(t);
 
-       emit2(p, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg),
+       emit2(t->Program, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg),
                swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
                swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
-       emit3(p, OPCODE_MAD, inst->DstReg,
+       emit3(t->Program, OPCODE_MAD, inst->DstReg,
                swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
                swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
                negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
@@ -257,22 +257,20 @@ static void transform_XPD(struct gl_program* p,
  *
  * @todo add LIT here as well?
  */
-GLboolean radeonTransformALU(
-       GLcontext* ctx,
-       struct gl_program* prog,
+GLboolean radeonTransformALU(struct radeon_transform_context* t,
        struct prog_instruction* inst,
        void* unused)
 {
        switch(inst->Opcode) {
-       case OPCODE_ABS: transform_ABS(prog, inst); return GL_TRUE;
-       case OPCODE_DPH: transform_DPH(prog, inst); return GL_TRUE;
-       case OPCODE_FLR: transform_FLR(prog, inst); return GL_TRUE;
-       case OPCODE_POW: transform_POW(prog, inst); return GL_TRUE;
-       case OPCODE_SGE: transform_SGE(prog, inst); return GL_TRUE;
-       case OPCODE_SLT: transform_SLT(prog, inst); return GL_TRUE;
-       case OPCODE_SUB: transform_SUB(prog, inst); return GL_TRUE;
-       case OPCODE_SWZ: transform_SWZ(prog, inst); return GL_TRUE;
-       case OPCODE_XPD: transform_XPD(prog, inst); return GL_TRUE;
+       case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
+       case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
+       case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
+       case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
+       case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
+       case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
+       case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
+       case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
+       case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
        default:
                return GL_FALSE;
        }
index f5beb9f8c3065de710094e7a9bcc4e0b7c59940c..858c5ed0b8c8f9d83bc0862d1e484067ad6148d8 100644 (file)
@@ -31,8 +31,7 @@
 #include "radeon_program.h"
 
 GLboolean radeonTransformALU(
-       GLcontext*,
-       struct gl_program*,
+       struct radeon_transform_context *t,
        struct prog_instruction*,
        void*);