r500: Add "Not quite SSA" and dead code elimination pass
authorNicolai Haehnle <nhaehnle@gmail.com>
Sun, 6 Jul 2008 17:48:50 +0000 (19:48 +0200)
committerNicolai Haehnle <nhaehnle@gmail.com>
Sat, 12 Jul 2008 07:36:02 +0000 (09:36 +0200)
In addition, this pass fixes non-native swizzles.

src/mesa/drivers/dri/r300/Makefile
src/mesa/drivers/dri/r300/r500_fragprog.c
src/mesa/drivers/dri/r300/r500_fragprog_emit.c
src/mesa/drivers/dri/r300/radeon_nqssadce.c [new file with mode: 0644]
src/mesa/drivers/dri/r300/radeon_nqssadce.h [new file with mode: 0644]
src/mesa/shader/program.c
src/mesa/shader/program.h

index d52b2b4c36ddd7102e813a53213b60a367d2e710..1dc75a30625a614a2d980fa0e9865c719b23d846 100644 (file)
@@ -38,6 +38,7 @@ DRIVER_SOURCES = \
                 r300_texstate.c \
                 radeon_program.c \
                 radeon_program_alu.c \
+                radeon_nqssadce.c \
                 r300_vertprog.c \
                 r300_fragprog.c \
                 r300_fragprog_emit.c \
index 9bb92d3ba419d9576f80bc9684f89647f8bdc0fa..c92ea8f5e6d7eea1962643ef0114e1f5017e49f1 100644 (file)
@@ -27,6 +27,7 @@
 
 #include "r500_fragprog.h"
 
+#include "radeon_nqssadce.h"
 #include "radeon_program_alu.h"
 
 
@@ -250,6 +251,57 @@ static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
 }
 
 
+static void nqssadce_init(struct nqssadce_state* s)
+{
+       s->Outputs[FRAG_RESULT_COLR].Sourced = WRITEMASK_XYZW;
+       s->Outputs[FRAG_RESULT_DEPR].Sourced = WRITEMASK_W;
+}
+
+static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
+{
+       GLuint relevant;
+       int i;
+
+       if (reg.Abs)
+               return GL_TRUE;
+
+       relevant = 0;
+       for(i = 0; i < 3; ++i) {
+               GLuint swz = GET_SWZ(reg.Swizzle, i);
+               if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+                       relevant |= 1 << i;
+       }
+       if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
+               return GL_FALSE;
+
+       return GL_TRUE;
+}
+
+/**
+ * Implement a non-native swizzle. This function assumes that
+ * is_native_swizzle returned true.
+ */
+static void nqssadce_build_swizzle(struct nqssadce_state *s,
+       struct prog_dst_register dst, struct prog_src_register src)
+{
+       struct prog_instruction *inst;
+
+       _mesa_insert_instructions(s->Program, s->IP, 2);
+       inst = s->Program->Instructions + s->IP;
+
+       inst[0].Opcode = OPCODE_MOV;
+       inst[0].DstReg = dst;
+       inst[0].DstReg.WriteMask &= src.NegateBase;
+       inst[0].SrcReg[0] = src;
+
+       inst[1].Opcode = OPCODE_MOV;
+       inst[1].DstReg = dst;
+       inst[1].DstReg.WriteMask &= ~src.NegateBase;
+       inst[1].SrcReg[0] = src;
+
+       s->IP += 2;
+}
+
 static GLuint build_dtm(GLuint depthmode)
 {
        switch(depthmode) {
@@ -327,7 +379,20 @@ void r500TranslateFragmentShader(r300ContextPtr r300,
                        3, transformations);
 
                if (RADEON_DEBUG & DEBUG_PIXEL) {
-                       _mesa_printf("Compiler: after all transformations:\n");
+                       _mesa_printf("Compiler: after native rewrite:\n");
+                       _mesa_print_program(compiler.program);
+               }
+
+               struct radeon_nqssadce_descr nqssadce = {
+                       .Init = &nqssadce_init,
+                       .IsNativeSwizzle = &is_native_swizzle,
+                       .BuildSwizzle = &nqssadce_build_swizzle,
+                       .RewriteDepthOut = GL_TRUE
+               };
+               radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
+
+               if (RADEON_DEBUG & DEBUG_PIXEL) {
+                       _mesa_printf("Compiler: after NqSSA-DCE:\n");
                        _mesa_print_program(compiler.program);
                }
 
index 4f658039536e38d000a6e0d7e38819345ff24408..275911679dc52fbcf90f25593f5cc7ea7e74c71c 100644 (file)
@@ -163,23 +163,30 @@ static const struct prog_dst_register dstreg_template = {
        .WriteMask = WRITEMASK_XYZW
 };
 
+static INLINE GLuint fix_hw_swizzle(GLuint swz)
+{
+       if (swz == 5) swz = 6;
+       if (swz == SWIZZLE_NIL) swz = 4;
+       return swz;
+}
+
 static INLINE GLuint make_rgb_swizzle(struct prog_src_register src) {
        GLuint swiz = 0x0;
        GLuint temp;
        /* This could be optimized, but it should be plenty fast already. */
        int i;
+       int negatebase = 0;
        for (i = 0; i < 3; i++) {
-               temp = GET_SWZ(src.Swizzle, i);
-               /* Fix SWIZZLE_ONE */
-               if (temp == 5) temp++;
+               temp = GET_SWZ(src.Swizzle, i);
+               if (temp != SWIZZLE_NIL && GET_BIT(src.NegateBase, i))
+                       negatebase = 1;
+               temp = fix_hw_swizzle(temp);
                swiz |= temp << i*3;
        }
-       if (src.Abs) {
+       if (src.Abs)
                swiz |= R500_SWIZ_MOD_ABS << 9;
-       } else if (src.NegateBase & 7) {
-               ASSERT((src.NegateBase & 7) == 7);
+       else if (negatebase)
                swiz |= R500_SWIZ_MOD_NEG << 9;
-       }
        if (src.NegateAbs)
                swiz ^= R500_SWIZ_MOD_NEG << 9;
        return swiz;
@@ -191,8 +198,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
        int i;
        for (i = 0; i < 4; i++) {
                temp = GET_SWZ(src, i);
-               /* Fix SWIZZLE_ONE */
-               if (temp == 5) temp++;
+               temp = fix_hw_swizzle(temp);
                swiz |= temp << i*3;
        }
        return swiz;
@@ -201,7 +207,7 @@ static INLINE GLuint make_rgba_swizzle(GLuint src) {
 static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
        GLuint swiz = GET_SWZ(src.Swizzle, 3);
 
-       if (swiz == 5) swiz++;
+       swiz = fix_hw_swizzle(swiz);
 
        if (src.Abs) {
                swiz |= R500_SWIZ_MOD_ABS << 3;
@@ -217,7 +223,7 @@ static INLINE GLuint make_alpha_swizzle(struct prog_src_register src) {
 static INLINE GLuint make_sop_swizzle(struct prog_src_register src) {
        GLuint swiz = GET_SWZ(src.Swizzle, 0);
 
-       if (swiz == 5) swiz++;
+       swiz = fix_hw_swizzle(swiz);
 
        if (src.Abs) {
                swiz |= R500_SWIZ_MOD_ABS << 3;
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.c b/src/mesa/drivers/dri/r300/radeon_nqssadce.c
new file mode 100644 (file)
index 0000000..f10ba40
--- /dev/null
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * "Not-quite SSA" and Dead-Code Elimination.
+ *
+ * @note This code uses SWIZZLE_NIL in a source register to indicate that
+ * the corresponding component is ignored by the corresponding instruction.
+ */
+
+#include "radeon_nqssadce.h"
+
+
+/**
+ * Return the @ref register_state for the given register (or 0 for untracked
+ * registers, i.e. constants).
+ */
+static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
+{
+       switch(file) {
+       case PROGRAM_TEMPORARY: return &s->Temps[index];
+       case PROGRAM_OUTPUT: return &s->Outputs[index];
+       default: return 0;
+       }
+}
+
+
+/**
+ * Left multiplication of a register with a swizzle
+ *
+ * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
+ */
+static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
+{
+       struct prog_src_register tmp = srcreg;
+       int i;
+       tmp.Swizzle = 0;
+       tmp.NegateBase = 0;
+       for(i = 0; i < 4; ++i) {
+               GLuint swz = GET_SWZ(swizzle, i);
+               if (swz < 4) {
+                       tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+                       tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i;
+               } else {
+                       tmp.Swizzle |= swz << (i*3);
+               }
+       }
+       return tmp;
+}
+
+
+static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
+       struct prog_instruction *inst, GLint src, GLuint sourced)
+{
+       int i;
+       GLuint deswz_source = 0;
+
+       for(i = 0; i < 4; ++i) {
+               if (GET_BIT(sourced, i)) {
+                       GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
+                       deswz_source |= 1 << swz;
+               } else {
+                       inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
+                       inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+               }
+       }
+
+       if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
+               struct prog_dst_register dstreg = inst->DstReg;
+               dstreg.File = PROGRAM_TEMPORARY;
+               dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+               dstreg.WriteMask = sourced;
+
+               s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
+
+               inst = s->Program->Instructions + s->IP;
+               inst->SrcReg[src].File = PROGRAM_TEMPORARY;
+               inst->SrcReg[src].Index = dstreg.Index;
+               inst->SrcReg[src].Swizzle = 0;
+               inst->SrcReg[src].NegateBase = 0;
+               inst->SrcReg[src].Abs = 0;
+               inst->SrcReg[src].NegateAbs = 0;
+               for(i = 0; i < 4; ++i) {
+                       if (GET_BIT(sourced, i))
+                               inst->SrcReg[src].Swizzle |= i << (3*i);
+                       else
+                               inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
+               }
+               deswz_source = sourced;
+       }
+
+       struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
+       if (regstate)
+               regstate->Sourced |= deswz_source & 0xf;
+
+       return inst;
+}
+
+
+static void rewrite_depth_out(struct prog_instruction *inst)
+{
+       if (inst->DstReg.WriteMask & WRITEMASK_Z) {
+               inst->DstReg.WriteMask = WRITEMASK_W;
+       } else {
+               inst->DstReg.WriteMask = 0;
+               return;
+       }
+
+       switch (inst->Opcode) {
+       case OPCODE_FRC:
+       case OPCODE_MOV:
+               inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+               break;
+       case OPCODE_ADD:
+       case OPCODE_MAX:
+       case OPCODE_MIN:
+       case OPCODE_MUL:
+               inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+               inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+               break;
+       case OPCODE_CMP:
+       case OPCODE_MAD:
+               inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
+               inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
+               inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
+               break;
+       default:
+               // Scalar instructions needn't be reswizzled
+               break;
+       }
+}
+
+static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
+{
+       int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
+       int i;
+       for(i = 0; i < nsrc; ++i)
+               if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
+                       inst->SrcReg[i].Index = newindex;
+}
+
+static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
+{
+       GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
+       int ip;
+       for(ip = 0; ip < s->IP; ++ip) {
+               struct prog_instruction* inst = s->Program->Instructions + ip;
+               if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
+                       inst->DstReg.Index = newindex;
+               unalias_srcregs(inst, oldindex, newindex);
+       }
+       unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
+}
+
+
+/**
+ * Handle one instruction.
+ */
+static void process_instruction(struct nqssadce_state* s)
+{
+       struct prog_instruction *inst = s->Program->Instructions + s->IP;
+
+       if (inst->Opcode == OPCODE_END)
+               return;
+
+       if (inst->Opcode != OPCODE_KIL) {
+               if (s->Descr->RewriteDepthOut) {
+                       if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR)
+                               rewrite_depth_out(inst);
+               }
+
+               struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
+               if (!regstate) {
+                       _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
+                               inst->DstReg.File, inst->DstReg.Index);
+                       return;
+               }
+
+               inst->DstReg.WriteMask &= regstate->Sourced;
+               regstate->Sourced &= ~inst->DstReg.WriteMask;
+
+               if (inst->DstReg.WriteMask == 0) {
+                       _mesa_delete_instructions(s->Program, s->IP, 1);
+                       return;
+               }
+
+               if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
+                       unalias_temporary(s, inst->DstReg.Index);
+       }
+
+       /* Attention: Due to swizzle emulation code, the following
+        * might change the instruction stream under us, so we have
+        * to be careful with the inst pointer. */
+       switch (inst->Opcode) {
+       case OPCODE_FRC:
+       case OPCODE_MOV:
+               inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+               break;
+       case OPCODE_ADD:
+       case OPCODE_MAX:
+       case OPCODE_MIN:
+       case OPCODE_MUL:
+               inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+               inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+               break;
+       case OPCODE_CMP:
+       case OPCODE_MAD:
+               inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
+               inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
+               inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
+               break;
+       case OPCODE_COS:
+       case OPCODE_EX2:
+       case OPCODE_LG2:
+       case OPCODE_RCP:
+       case OPCODE_RSQ:
+       case OPCODE_SIN:
+               inst = track_used_srcreg(s, inst, 0, 0x1);
+               break;
+       case OPCODE_DP3:
+               inst = track_used_srcreg(s, inst, 0, 0x7);
+               inst = track_used_srcreg(s, inst, 1, 0x7);
+               break;
+       case OPCODE_DP4:
+               inst = track_used_srcreg(s, inst, 0, 0xf);
+               inst = track_used_srcreg(s, inst, 1, 0xf);
+               break;
+       case OPCODE_KIL:
+       case OPCODE_TEX:
+       case OPCODE_TXB:
+       case OPCODE_TXP:
+               inst = track_used_srcreg(s, inst, 0, 0xf);
+               break;
+       default:
+               _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
+               return;
+       }
+}
+
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
+{
+       struct nqssadce_state s;
+
+       _mesa_bzero(&s, sizeof(s));
+       s.Ctx = ctx;
+       s.Program = p;
+       s.Descr = descr;
+       s.Descr->Init(&s);
+       s.IP = p->NumInstructions;
+
+       while(s.IP > 0) {
+               s.IP--;
+               process_instruction(&s);
+       }
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_nqssadce.h b/src/mesa/drivers/dri/r300/radeon_nqssadce.h
new file mode 100644 (file)
index 0000000..a4f94ab
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_NQSSADCE_H_
+#define __RADEON_PROGRAM_NQSSADCE_H_
+
+#include "radeon_program.h"
+
+
+struct register_state {
+       /**
+        * Bitmask indicating which components of the register are sourced
+        * by later instructions.
+        */
+       GLuint Sourced : 4;
+};
+
+/**
+ * Maintain state such as which registers are used, which registers are
+ * read from, etc.
+ */
+struct nqssadce_state {
+       GLcontext *Ctx;
+       struct gl_program *Program;
+       struct radeon_nqssadce_descr *Descr;
+
+       /**
+        * All instructions after this instruction pointer have been dealt with.
+        */
+       int IP;
+
+       /**
+        * Which registers are read by subsequent instructions?
+        */
+       struct register_state Temps[MAX_PROGRAM_TEMPS];
+       struct register_state Outputs[VERT_RESULT_MAX];
+};
+
+
+/**
+ * This structure contains a description of the hardware in-so-far as
+ * it is required for the NqSSA-DCE pass.
+ */
+struct radeon_nqssadce_descr {
+       /**
+        * Fill in which outputs
+        */
+       void (*Init)(struct nqssadce_state *);
+
+       /**
+        * Check whether the given swizzle, absolute and negate combination
+        * can be implemented natively by the hardware for this opcode.
+        */
+       GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
+
+       /**
+        * Emit (at the current IP) the instruction MOV dst, src;
+        * The transformation will work recursively on the emitted instruction(s).
+        */
+       void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
+
+       /**
+        * Rewrite instructions that write to DEPR.z to write to DEPR.w
+        * instead (rewriting is done *before* the WriteMask test).
+        */
+       GLboolean RewriteDepthOut;
+       void *Data;
+};
+
+void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
+
+#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */
index 376d7ee60d4b21804cffbc493241f2c623a68c2e..a80e6e9154179ff8c90202077b893025211d4658 100644 (file)
@@ -112,7 +112,7 @@ _mesa_free_program_data(GLcontext *ctx)
 
 /**
  * Update the default program objects in the given context to reference those
- * specified in the shared state and release those referencing the old 
+ * specified in the shared state and release those referencing the old
  * shared state.
  */
 void
@@ -238,7 +238,7 @@ struct gl_program *
 _mesa_init_fragment_program( GLcontext *ctx, struct gl_fragment_program *prog,
                              GLenum target, GLuint id)
 {
-   if (prog) 
+   if (prog)
       return _mesa_init_program_struct( ctx, &prog->Base, target, id );
    else
       return NULL;
@@ -252,7 +252,7 @@ struct gl_program *
 _mesa_init_vertex_program( GLcontext *ctx, struct gl_vertex_program *prog,
                            GLenum target, GLuint id)
 {
-   if (prog) 
+   if (prog)
       return _mesa_init_program_struct( ctx, &prog->Base, target, id );
    else
       return NULL;
@@ -265,7 +265,7 @@ _mesa_init_vertex_program( GLcontext *ctx, struct gl_vertex_program *prog,
  * ctx->Driver.NewProgram.  May be overridden (ie. replaced) by a
  * device driver function to implement OO deriviation with additional
  * types not understood by this function.
- * 
+ *
  * \param ctx  context
  * \param id   program id/number
  * \param target  program target/type
@@ -309,7 +309,7 @@ _mesa_delete_program(GLcontext *ctx, struct gl_program *prog)
 
    if (prog == &_mesa_DummyProgram)
       return;
-                 
+
    if (prog->String)
       _mesa_free(prog->String);
 
@@ -382,7 +382,7 @@ _mesa_reference_program(GLcontext *ctx,
 
       deleteFlag = ((*ptr)->RefCount == 0);
       /*_glthread_UNLOCK_MUTEX((*ptr)->Mutex);*/
-      
+
       if (deleteFlag) {
          ASSERT(ctx);
          ctx->Driver.DeleteProgram(ctx, *ptr);
@@ -541,6 +541,53 @@ _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count)
 }
 
 
+/**
+ * Delete 'count' instructions at 'start' in the given program.
+ * Adjust branch targets accordingly.
+ */
+GLboolean
+_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count)
+{
+   const GLuint origLen = prog->NumInstructions;
+   const GLuint newLen = origLen - count;
+   struct prog_instruction *newInst;
+   GLuint i;
+
+   /* adjust branches */
+   for (i = 0; i < prog->NumInstructions; i++) {
+      struct prog_instruction *inst = prog->Instructions + i;
+      if (inst->BranchTarget > 0) {
+         if (inst->BranchTarget >= start) {
+            inst->BranchTarget -= count;
+         }
+      }
+   }
+
+   /* Alloc storage for new instructions */
+   newInst = _mesa_alloc_instructions(newLen);
+   if (!newInst) {
+      return GL_FALSE;
+   }
+
+   /* Copy 'start' instructions into new instruction buffer */
+   _mesa_copy_instructions(newInst, prog->Instructions, start);
+
+   /* Copy the remaining/tail instructions to new inst buffer */
+   _mesa_copy_instructions(newInst + start,
+                           prog->Instructions + start + count,
+                           newLen - start);
+
+   /* free old instructions */
+   _mesa_free_instructions(prog->Instructions, origLen);
+
+   /* install new instructions */
+   prog->Instructions = newInst;
+   prog->NumInstructions = newLen;
+
+   return GL_TRUE;
+}
+
+
 /**
  * Search instructions for registers that match (oldFile, oldIndex),
  * replacing them with (newFile, newIndex).
@@ -844,7 +891,7 @@ _mesa_BindProgram(GLenum target, GLuint id)
  * \note Not compiled into display lists.
  * \note Called by both glDeleteProgramsNV and glDeleteProgramsARB.
  */
-void GLAPIENTRY 
+void GLAPIENTRY
 _mesa_DeletePrograms(GLsizei n, const GLuint *ids)
 {
    GLint i;
index f1a69a2c016361d0e9c1fa80547f5e4cb191c9d7..48fe06ab7f36a60fc6784452fe288e5714f0900a 100644 (file)
@@ -67,13 +67,13 @@ _mesa_find_line_column(const GLubyte *string, const GLubyte *pos,
                        GLint *line, GLint *col);
 
 
-extern struct gl_program * 
-_mesa_init_vertex_program(GLcontext *ctx, 
-                          struct gl_vertex_program *prog, 
+extern struct gl_program *
+_mesa_init_vertex_program(GLcontext *ctx,
+                          struct gl_vertex_program *prog,
                           GLenum target, GLuint id);
 
-extern struct gl_program * 
-_mesa_init_fragment_program(GLcontext *ctx, 
+extern struct gl_program *
+_mesa_init_fragment_program(GLcontext *ctx,
                             struct gl_fragment_program *prog,
                             GLenum target, GLuint id);
 
@@ -115,6 +115,9 @@ _mesa_clone_program(GLcontext *ctx, const struct gl_program *prog);
 extern  GLboolean
 _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count);
 
+extern  GLboolean
+_mesa_delete_instructions(struct gl_program *prog, GLuint start, GLuint count);
+
 extern struct gl_program *
 _mesa_combine_programs(GLcontext *ctx,
                        const struct gl_program *progA,