r300/vertprog: Refactor wpos rewrite using rc_program
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
index 400408620ea465f7c1c434d46994976fcfdbed31..c05b488645b7520091fa8167a2196a9b14877ed2 100644 (file)
@@ -28,7 +28,6 @@
 #include "radeon_program.h"
 #include "radeon_program_alu.h"
 
-#include "shader/prog_optimize.h"
 #include "shader/prog_print.h"
 
 
@@ -311,35 +310,35 @@ static void ei_pow(struct r300_vertex_program_code *vp,
        inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
 }
 
-static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
 {
        int i;
        int cur_reg;
        GLuint OutputsWritten, InputsRead;
 
-       OutputsWritten = glvp->OutputsWritten;
-       InputsRead = glvp->InputsRead;
+       OutputsWritten = c->Base.Program.OutputsWritten;
+       InputsRead = c->Base.Program.InputsRead;
 
        cur_reg = -1;
        for (i = 0; i < VERT_ATTRIB_MAX; i++) {
                if (InputsRead & (1 << i))
-                       vp->inputs[i] = ++cur_reg;
+                       c->code->inputs[i] = ++cur_reg;
                else
-                       vp->inputs[i] = -1;
+                       c->code->inputs[i] = -1;
        }
 
        cur_reg = 0;
        for (i = 0; i < VERT_RESULT_MAX; i++)
-               vp->outputs[i] = -1;
+               c->code->outputs[i] = -1;
 
        assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
 
        if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
-               vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+               c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
-               vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+               c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
        }
 
        /* If we're writing back facing colors we need to send
@@ -349,58 +348,62 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog
         * get written into appropriate output vectors.
         */
        if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
-               vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+               c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
                OutputsWritten & (1 << VERT_RESULT_BFC1)) {
                cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
-               vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+               c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
                OutputsWritten & (1 << VERT_RESULT_BFC1)) {
                cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
-               vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+               c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
                cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
-               vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+               c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
                cur_reg++;
        }
 
        for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
                if (OutputsWritten & (1 << i)) {
-                       vp->outputs[i] = cur_reg++;
+                       c->code->outputs[i] = cur_reg++;
                }
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
-               vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+               c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
        }
 }
 
-static GLboolean translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
 {
-       struct prog_instruction *vpi = compiler->program->Instructions;
-       GLuint *inst;
+       struct rc_instruction *rci;
 
        compiler->code->pos_end = 0;    /* Not supported yet */
        compiler->code->length = 0;
 
-       t_inputs_outputs(compiler->code, compiler->program);
+       t_inputs_outputs(compiler);
+
+       for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+               struct prog_instruction *vpi = &rci->I;
+               GLuint *inst = compiler->code->body.d + compiler->code->length;
 
-       for (inst = compiler->code->body.d; vpi->Opcode != OPCODE_END;
-            vpi++, inst += 4) {
                /* Skip instructions writing to non-existing destination */
-               if (!valid_dst(compiler->code, &vpi->DstReg)) {
-                       inst -= 4;
+               if (!valid_dst(compiler->code, &vpi->DstReg))
                        continue;
+
+               if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+                       rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+                       return;
                }
 
                switch (vpi->Opcode) {
@@ -425,244 +428,192 @@ static GLboolean translate_vertex_program(struct r300_vertex_program_compiler *
                case OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
                case OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
                default:
-                       fprintf(stderr, "Unknown opcode %i\n", vpi->Opcode);
-                       return GL_FALSE;
+                       rc_error(&compiler->Base, "Unknown opcode %i\n", vpi->Opcode);
+                       return;
                }
-       }
 
-       compiler->code->length = (inst - compiler->code->body.d);
-       if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
-               return GL_FALSE;
-       }
+               compiler->code->length += 4;
 
-       return GL_TRUE;
+               if (compiler->Base.Error)
+                       return;
+       }
 }
 
-/**
- * Vertex engine cannot read two inputs or two constants at the same time.
- * Introduce intermediate MOVs to temporary registers to account for this.
- */
-static GLboolean transform_source_conflicts(
-       struct radeon_transform_context *t,
-       struct prog_instruction* orig_inst,
-       void* unused)
-{
-       struct prog_instruction inst = *orig_inst;
-       struct prog_instruction * dst;
-       GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode);
+struct temporary_allocation {
+       GLuint Allocated:1;
+       GLuint HwTemp:15;
+       struct rc_instruction * LastRead;
+};
 
-       if (num_operands == 3) {
-               if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2])
-                   || t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) {
-                       int tmpreg = radeonFindFreeTemporary(t);
-                       struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
-                       inst_mov->Opcode = OPCODE_MOV;
-                       inst_mov->DstReg.File = PROGRAM_TEMPORARY;
-                       inst_mov->DstReg.Index = tmpreg;
-                       inst_mov->SrcReg[0] = inst.SrcReg[2];
-
-                       reset_srcreg(&inst.SrcReg[2]);
-                       inst.SrcReg[2].File = PROGRAM_TEMPORARY;
-                       inst.SrcReg[2].Index = tmpreg;
+static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+{
+       struct rc_instruction *inst;
+       GLuint num_orig_temps = 0;
+       GLboolean hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+       struct temporary_allocation * ta;
+       GLuint i, j;
+
+       compiler->code->num_temporaries = 0;
+       memset(hwtemps, 0, sizeof(hwtemps));
+
+       /* Pass 1: Count original temporaries and allocate structures */
+       for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+               GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+               GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
+
+               for (i = 0; i < numsrcs; ++i) {
+                       if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+                               if (inst->I.SrcReg[i].Index >= num_orig_temps)
+                                       num_orig_temps = inst->I.SrcReg[i].Index + 1;
+                       }
                }
-       }
 
-       if (num_operands >= 2) {
-               if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) {
-                       int tmpreg = radeonFindFreeTemporary(t);
-                       struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
-                       inst_mov->Opcode = OPCODE_MOV;
-                       inst_mov->DstReg.File = PROGRAM_TEMPORARY;
-                       inst_mov->DstReg.Index = tmpreg;
-                       inst_mov->SrcReg[0] = inst.SrcReg[1];
-
-                       reset_srcreg(&inst.SrcReg[1]);
-                       inst.SrcReg[1].File = PROGRAM_TEMPORARY;
-                       inst.SrcReg[1].Index = tmpreg;
+               if (numdsts) {
+                       if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+                               if (inst->I.DstReg.Index >= num_orig_temps)
+                                       num_orig_temps = inst->I.DstReg.Index + 1;
+                       }
                }
        }
 
-       dst = radeonAppendInstructions(t->Program, 1);
-       *dst = inst;
-       return GL_TRUE;
-}
-
-static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
-{
-       struct prog_instruction *vpi;
-
-       _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
-
-       vpi = &prog->Instructions[prog->NumInstructions - 3];
-
-       vpi->Opcode = OPCODE_MOV;
-
-       vpi->DstReg.File = PROGRAM_OUTPUT;
-       vpi->DstReg.Index = VERT_RESULT_HPOS;
-       vpi->DstReg.WriteMask = WRITEMASK_XYZW;
-       vpi->DstReg.CondMask = COND_TR;
-
-       vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
-       vpi->SrcReg[0].Index = temp_index;
-       vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+       ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+                       sizeof(struct temporary_allocation) * num_orig_temps);
+       memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
 
-       ++vpi;
+       /* Pass 2: Determine original temporary lifetimes */
+       for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+               GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
 
-       vpi->Opcode = OPCODE_MOV;
+               for (i = 0; i < numsrcs; ++i) {
+                       if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY)
+                               ta[inst->I.SrcReg[i].Index].LastRead = inst;
+               }
+       }
 
-       vpi->DstReg.File = PROGRAM_OUTPUT;
-       vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
-       vpi->DstReg.WriteMask = WRITEMASK_XYZW;
-       vpi->DstReg.CondMask = COND_TR;
+       /* Pass 3: Register allocation */
+       for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+               GLuint numsrcs = _mesa_num_inst_src_regs(inst->I.Opcode);
+               GLuint numdsts = _mesa_num_inst_dst_regs(inst->I.Opcode);
 
-       vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
-       vpi->SrcReg[0].Index = temp_index;
-       vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+               for (i = 0; i < numsrcs; ++i) {
+                       if (inst->I.SrcReg[i].File == PROGRAM_TEMPORARY) {
+                               GLuint orig = inst->I.SrcReg[i].Index;
+                               inst->I.SrcReg[i].Index = ta[orig].HwTemp;
 
-       ++vpi;
+                               if (ta[orig].Allocated && inst == ta[orig].LastRead)
+                                       hwtemps[ta[orig].HwTemp] = GL_FALSE;
+                       }
+               }
 
-       vpi->Opcode = OPCODE_END;
-}
+               if (numdsts) {
+                       if (inst->I.DstReg.File == PROGRAM_TEMPORARY) {
+                               GLuint orig = inst->I.DstReg.Index;
 
-static void pos_as_texcoord(struct gl_program *prog, int tex_id)
-{
-       struct prog_instruction *vpi;
-       GLuint tempregi = prog->NumTemporaries;
-
-       prog->NumTemporaries++;
+                               if (!ta[orig].Allocated) {
+                                       for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+                                               if (!hwtemps[j])
+                                                       break;
+                                       }
+                                       if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+                                               fprintf(stderr, "Out of hw temporaries\n");
+                                       } else {
+                                               ta[orig].Allocated = GL_TRUE;
+                                               ta[orig].HwTemp = j;
+                                               hwtemps[j] = GL_TRUE;
+
+                                               if (j >= compiler->code->num_temporaries)
+                                                       compiler->code->num_temporaries = j + 1;
+                                       }
+                               }
 
-       for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
-               if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
-                       vpi->DstReg.File = PROGRAM_TEMPORARY;
-                       vpi->DstReg.Index = tempregi;
+                               inst->I.DstReg.Index = ta[orig].HwTemp;
+                       }
                }
        }
-
-       insert_wpos(prog, tempregi, tex_id);
-
-       prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
 }
 
+
 /**
- * The fogcoord attribute is special in that only the first component
- * is relevant, and the remaining components are always fixed (when read
- * from by the fragment program) to yield an X001 pattern.
- *
- * We need to enforce this either in the vertex program or in the fragment
- * program, and this code chooses not to enforce it in the vertex program.
- * This is slightly cheaper, as long as the fragment program does not use
- * weird swizzles.
- *
- * And it seems that usually, weird swizzles are not used, so...
- *
- * See also the counterpart rewriting for fragment programs.
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
  */
-static void fog_as_texcoord(struct gl_program *prog, int tex_id)
+static GLboolean transform_source_conflicts(
+       struct radeon_compiler *c,
+       struct rc_instruction* inst,
+       void* unused)
 {
-       struct prog_instruction *vpi;
+       GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
 
-       vpi = prog->Instructions;
-       while (vpi->Opcode != OPCODE_END) {
-               if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
-                       vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
-                       vpi->DstReg.WriteMask = WRITEMASK_X;
+       if (num_operands == 3) {
+               if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
+                   || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+                       int tmpreg = rc_find_free_temporary(c);
+                       struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+                       inst_mov->I.Opcode = OPCODE_MOV;
+                       inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+                       inst_mov->I.DstReg.Index = tmpreg;
+                       inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
+
+                       reset_srcreg(&inst->I.SrcReg[2]);
+                       inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+                       inst->I.SrcReg[2].Index = tmpreg;
                }
+       }
 
-               ++vpi;
+       if (num_operands >= 2) {
+               if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+                       int tmpreg = rc_find_free_temporary(c);
+                       struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+                       inst_mov->I.Opcode = OPCODE_MOV;
+                       inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+                       inst_mov->I.DstReg.Index = tmpreg;
+                       inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
+
+                       reset_srcreg(&inst->I.SrcReg[1]);
+                       inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+                       inst->I.SrcReg[1].Index = tmpreg;
+               }
        }
 
-       prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
-       prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
+       return GL_TRUE;
 }
 
-
-#define ADD_OUTPUT(fp_attr, vp_result) \
-       do { \
-               if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
-                       OutputsAdded |= 1 << (vp_result); \
-                       count++; \
-               } \
-       } while (0)
-
 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
 {
-       GLuint OutputsAdded, FpReads;
-       int i, count;
-
-       OutputsAdded = 0;
-       count = 0;
-       FpReads = compiler->state.FpReads;
-
-       ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
-       ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
-
-       for (i = 0; i < 7; ++i) {
-               ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
-       }
-
-       /* Some outputs may be artificially added, to match the inputs of the fragment program.
-        * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
-        * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
-        */
-       if (count > 0) {
-               struct prog_instruction *inst;
-
-               _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count);
-               inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count];
+       int i;
 
-               for (i = 0; i < VERT_RESULT_MAX; ++i) {
-                       if (OutputsAdded & (1 << i)) {
-                               inst->Opcode = OPCODE_MOV;
+       for(i = 0; i < 32; ++i) {
+               if ((compiler->RequiredOutputs & (1 << i)) &&
+                   !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+                       struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+                       inst->I.Opcode = OPCODE_MOV;
 
-                               inst->DstReg.File = PROGRAM_OUTPUT;
-                               inst->DstReg.Index = i;
-                               inst->DstReg.WriteMask = WRITEMASK_XYZW;
-                               inst->DstReg.CondMask = COND_TR;
+                       inst->I.DstReg.File = PROGRAM_OUTPUT;
+                       inst->I.DstReg.Index = i;
+                       inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
 
-                               inst->SrcReg[0].File = PROGRAM_CONSTANT;
-                               inst->SrcReg[0].Index = 0;
-                               inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+                       inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
+                       inst->I.SrcReg[0].Index = 0;
+                       inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
 
-                               ++inst;
-                       }
+                       compiler->Base.Program.OutputsWritten |= 1 << i;
                }
-
-               compiler->program->OutputsWritten |= OutputsAdded;
        }
 }
 
-#undef ADD_OUTPUT
-
 static void nqssadceInit(struct nqssadce_state* s)
 {
        struct r300_vertex_program_compiler * compiler = s->UserData;
-       GLuint fp_reads;
-
-       fp_reads = compiler->state.FpReads;
-       {
-               if (fp_reads & FRAG_BIT_COL0) {
-                               s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
-                               s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
-               }
+       int i;
 
-               if (fp_reads & FRAG_BIT_COL1) {
-                               s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
-                               s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
+       for(i = 0; i < VERT_RESULT_MAX; ++i) {
+               if (compiler->RequiredOutputs & (1 << i)) {
+                       if (i != VERT_RESULT_PSIZ)
+                               s->Outputs[i].Sourced = WRITEMASK_XYZW;
+                       else
+                               s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */
                }
        }
-
-       {
-               int i;
-               for (i = 0; i < 8; ++i) {
-                       if (fp_reads & FRAG_BIT_TEX(i)) {
-                               s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
-                       }
-               }
-       }
-
-       s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
-       if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
-               s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
 }
 
 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
@@ -675,16 +626,21 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
 
 
 
-GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler, GLcontext * ctx)
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
 {
-       GLboolean success;
+       rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+       compiler->program = 0;
 
        if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
-               pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
+               rc_copy_output(&compiler->Base,
+                       VERT_RESULT_HPOS,
+                       compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
        }
 
        if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
-               fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
+               rc_move_output(&compiler->Base,
+                       VERT_RESULT_FOGC,
+                       compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
        }
 
        addArtificialOutputs(compiler);
@@ -693,12 +649,12 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
                struct radeon_program_transformation transformations[] = {
                        { &r300_transform_vertex_alu, 0 },
                };
-               radeonLocalTransform(compiler->program, 1, transformations);
+               radeonLocalTransform(&compiler->Base, 1, transformations);
        }
 
        if (compiler->Base.Debug) {
                fprintf(stderr, "Vertex program after native rewrite:\n");
-               _mesa_print_program(compiler->program);
+               rc_print_program(&compiler->Base.Program);
                fflush(stdout);
        }
 
@@ -710,12 +666,12 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
                struct radeon_program_transformation transformations[] = {
                        { &transform_source_conflicts, 0 },
                };
-               radeonLocalTransform(compiler->program, 1, transformations);
+               radeonLocalTransform(&compiler->Base, 1, transformations);
        }
 
        if (compiler->Base.Debug) {
                fprintf(stderr, "Vertex program after source conflict resolve:\n");
-               _mesa_print_program(compiler->program);
+               rc_print_program(&compiler->Base.Program);
                fflush(stdout);
        }
 
@@ -725,56 +681,22 @@ GLboolean r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compi
                        .IsNativeSwizzle = &swizzleIsNative,
                        .BuildSwizzle = NULL
                };
-               radeonNqssaDce(compiler->program, &nqssadce, compiler);
+               radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
 
                /* We need this step for reusing temporary registers */
-               _mesa_optimize_program(ctx, compiler->program);
+               allocate_temporary_registers(compiler);
 
                if (compiler->Base.Debug) {
                        fprintf(stderr, "Vertex program after NQSSADCE:\n");
-                       _mesa_print_program(compiler->program);
+                       rc_print_program(&compiler->Base.Program);
                        fflush(stdout);
                }
        }
 
-       assert(compiler->program->NumInstructions);
-       {
-               struct prog_instruction *inst;
-               int max, i, tmp;
-
-               inst = compiler->program->Instructions;
-               max = -1;
-               while (inst->Opcode != OPCODE_END) {
-                       tmp = _mesa_num_inst_src_regs(inst->Opcode);
-                       for (i = 0; i < tmp; ++i) {
-                               if (inst->SrcReg[i].File == PROGRAM_TEMPORARY) {
-                                       if ((int) inst->SrcReg[i].Index > max) {
-                                               max = inst->SrcReg[i].Index;
-                                       }
-                               }
-                       }
-
-                       if (_mesa_num_inst_dst_regs(inst->Opcode)) {
-                               if (inst->DstReg.File == PROGRAM_TEMPORARY) {
-                                       if ((int) inst->DstReg.Index > max) {
-                                               max = inst->DstReg.Index;
-                                       }
-                               }
-                       }
-                       ++inst;
-               }
-
-               /* We actually want highest index of used temporary register,
-                * not the number of temporaries used.
-                * These values aren't always the same.
-                */
-               compiler->code->num_temporaries = max + 1;
-       }
-
-       success = translate_vertex_program(compiler);
+       translate_vertex_program(compiler);
 
-       compiler->code->InputsRead = compiler->program->InputsRead;
-       compiler->code->OutputsWritten = compiler->program->OutputsWritten;
+       rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
 
-       return success;
+       compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+       compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
 }