r300/vertprog: Refactor wpos rewrite using rc_program
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r3xx_vertprog.c
index 743fc205978fcd98826c67c75f441a2e350859d7..c05b488645b7520091fa8167a2196a9b14877ed2 100644 (file)
@@ -310,35 +310,35 @@ static void ei_pow(struct r300_vertex_program_code *vp,
        inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
 }
 
-static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_program * glvp)
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
 {
        int i;
        int cur_reg;
        GLuint OutputsWritten, InputsRead;
 
-       OutputsWritten = glvp->OutputsWritten;
-       InputsRead = glvp->InputsRead;
+       OutputsWritten = c->Base.Program.OutputsWritten;
+       InputsRead = c->Base.Program.InputsRead;
 
        cur_reg = -1;
        for (i = 0; i < VERT_ATTRIB_MAX; i++) {
                if (InputsRead & (1 << i))
-                       vp->inputs[i] = ++cur_reg;
+                       c->code->inputs[i] = ++cur_reg;
                else
-                       vp->inputs[i] = -1;
+                       c->code->inputs[i] = -1;
        }
 
        cur_reg = 0;
        for (i = 0; i < VERT_RESULT_MAX; i++)
-               vp->outputs[i] = -1;
+               c->code->outputs[i] = -1;
 
        assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
 
        if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
-               vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+               c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
-               vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+               c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
        }
 
        /* If we're writing back facing colors we need to send
@@ -348,39 +348,39 @@ static void t_inputs_outputs(struct r300_vertex_program_code *vp, struct gl_prog
         * get written into appropriate output vectors.
         */
        if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
-               vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+               c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
                OutputsWritten & (1 << VERT_RESULT_BFC1)) {
                cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
-               vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+               c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
                OutputsWritten & (1 << VERT_RESULT_BFC1)) {
                cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
-               vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+               c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
                cur_reg++;
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
-               vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+               c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
        } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
                cur_reg++;
        }
 
        for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
                if (OutputsWritten & (1 << i)) {
-                       vp->outputs[i] = cur_reg++;
+                       c->code->outputs[i] = cur_reg++;
                }
        }
 
        if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
-               vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+               c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
        }
 }
 
@@ -391,7 +391,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
        compiler->code->pos_end = 0;    /* Not supported yet */
        compiler->code->length = 0;
 
-       t_inputs_outputs(compiler->code, compiler->program);
+       t_inputs_outputs(compiler);
 
        for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
                struct prog_instruction *vpi = &rci->I;
@@ -538,226 +538,82 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
  * Introduce intermediate MOVs to temporary registers to account for this.
  */
 static GLboolean transform_source_conflicts(
-       struct radeon_transform_context *t,
-       struct prog_instruction* orig_inst,
+       struct radeon_compiler *c,
+       struct rc_instruction* inst,
        void* unused)
 {
-       struct prog_instruction inst = *orig_inst;
-       struct prog_instruction * dst;
-       GLuint num_operands = _mesa_num_inst_src_regs(inst.Opcode);
+       GLuint num_operands = _mesa_num_inst_src_regs(inst->I.Opcode);
 
        if (num_operands == 3) {
-               if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[2])
-                   || t_src_conflict(inst.SrcReg[0], inst.SrcReg[2])) {
-                       int tmpreg = radeonFindFreeTemporary(t);
-                       struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
-                       inst_mov->Opcode = OPCODE_MOV;
-                       inst_mov->DstReg.File = PROGRAM_TEMPORARY;
-                       inst_mov->DstReg.Index = tmpreg;
-                       inst_mov->SrcReg[0] = inst.SrcReg[2];
-
-                       reset_srcreg(&inst.SrcReg[2]);
-                       inst.SrcReg[2].File = PROGRAM_TEMPORARY;
-                       inst.SrcReg[2].Index = tmpreg;
+               if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[2])
+                   || t_src_conflict(inst->I.SrcReg[0], inst->I.SrcReg[2])) {
+                       int tmpreg = rc_find_free_temporary(c);
+                       struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+                       inst_mov->I.Opcode = OPCODE_MOV;
+                       inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+                       inst_mov->I.DstReg.Index = tmpreg;
+                       inst_mov->I.SrcReg[0] = inst->I.SrcReg[2];
+
+                       reset_srcreg(&inst->I.SrcReg[2]);
+                       inst->I.SrcReg[2].File = PROGRAM_TEMPORARY;
+                       inst->I.SrcReg[2].Index = tmpreg;
                }
        }
 
        if (num_operands >= 2) {
-               if (t_src_conflict(inst.SrcReg[1], inst.SrcReg[0])) {
-                       int tmpreg = radeonFindFreeTemporary(t);
-                       struct prog_instruction * inst_mov = radeonAppendInstructions(t->Program, 1);
-                       inst_mov->Opcode = OPCODE_MOV;
-                       inst_mov->DstReg.File = PROGRAM_TEMPORARY;
-                       inst_mov->DstReg.Index = tmpreg;
-                       inst_mov->SrcReg[0] = inst.SrcReg[1];
-
-                       reset_srcreg(&inst.SrcReg[1]);
-                       inst.SrcReg[1].File = PROGRAM_TEMPORARY;
-                       inst.SrcReg[1].Index = tmpreg;
+               if (t_src_conflict(inst->I.SrcReg[1], inst->I.SrcReg[0])) {
+                       int tmpreg = rc_find_free_temporary(c);
+                       struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+                       inst_mov->I.Opcode = OPCODE_MOV;
+                       inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
+                       inst_mov->I.DstReg.Index = tmpreg;
+                       inst_mov->I.SrcReg[0] = inst->I.SrcReg[1];
+
+                       reset_srcreg(&inst->I.SrcReg[1]);
+                       inst->I.SrcReg[1].File = PROGRAM_TEMPORARY;
+                       inst->I.SrcReg[1].Index = tmpreg;
                }
        }
 
-       dst = radeonAppendInstructions(t->Program, 1);
-       *dst = inst;
        return GL_TRUE;
 }
 
-static void insert_wpos(struct gl_program *prog, GLuint temp_index, int tex_id)
-{
-       struct prog_instruction *vpi;
-
-       _mesa_insert_instructions(prog, prog->NumInstructions - 1, 2);
-
-       vpi = &prog->Instructions[prog->NumInstructions - 3];
-
-       vpi->Opcode = OPCODE_MOV;
-
-       vpi->DstReg.File = PROGRAM_OUTPUT;
-       vpi->DstReg.Index = VERT_RESULT_HPOS;
-       vpi->DstReg.WriteMask = WRITEMASK_XYZW;
-       vpi->DstReg.CondMask = COND_TR;
-
-       vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
-       vpi->SrcReg[0].Index = temp_index;
-       vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
-       ++vpi;
-
-       vpi->Opcode = OPCODE_MOV;
-
-       vpi->DstReg.File = PROGRAM_OUTPUT;
-       vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
-       vpi->DstReg.WriteMask = WRITEMASK_XYZW;
-       vpi->DstReg.CondMask = COND_TR;
-
-       vpi->SrcReg[0].File = PROGRAM_TEMPORARY;
-       vpi->SrcReg[0].Index = temp_index;
-       vpi->SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
-       ++vpi;
-
-       vpi->Opcode = OPCODE_END;
-}
-
-static void pos_as_texcoord(struct gl_program *prog, int tex_id)
-{
-       struct prog_instruction *vpi;
-       GLuint tempregi = prog->NumTemporaries;
-
-       prog->NumTemporaries++;
-
-       for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
-               if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_HPOS) {
-                       vpi->DstReg.File = PROGRAM_TEMPORARY;
-                       vpi->DstReg.Index = tempregi;
-               }
-       }
-
-       insert_wpos(prog, tempregi, tex_id);
-
-       prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
-}
-
-/**
- * The fogcoord attribute is special in that only the first component
- * is relevant, and the remaining components are always fixed (when read
- * from by the fragment program) to yield an X001 pattern.
- *
- * We need to enforce this either in the vertex program or in the fragment
- * program, and this code chooses not to enforce it in the vertex program.
- * This is slightly cheaper, as long as the fragment program does not use
- * weird swizzles.
- *
- * And it seems that usually, weird swizzles are not used, so...
- *
- * See also the counterpart rewriting for fragment programs.
- */
-static void fog_as_texcoord(struct gl_program *prog, int tex_id)
-{
-       struct prog_instruction *vpi;
-
-       vpi = prog->Instructions;
-       while (vpi->Opcode != OPCODE_END) {
-               if (vpi->DstReg.File == PROGRAM_OUTPUT && vpi->DstReg.Index == VERT_RESULT_FOGC) {
-                       vpi->DstReg.Index = VERT_RESULT_TEX0 + tex_id;
-                       vpi->DstReg.WriteMask = WRITEMASK_X;
-               }
-
-               ++vpi;
-       }
-
-       prog->OutputsWritten &= ~(1 << VERT_RESULT_FOGC);
-       prog->OutputsWritten |= 1 << (VERT_RESULT_TEX0 + tex_id);
-}
-
-
-#define ADD_OUTPUT(fp_attr, vp_result) \
-       do { \
-               if ((FpReads & (1 << (fp_attr))) && !(compiler->program->OutputsWritten & (1 << (vp_result)))) { \
-                       OutputsAdded |= 1 << (vp_result); \
-                       count++; \
-               } \
-       } while (0)
-
 static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
 {
-       GLuint OutputsAdded, FpReads;
-       int i, count;
-
-       OutputsAdded = 0;
-       count = 0;
-       FpReads = compiler->state.FpReads;
-
-       ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
-       ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
-
-       for (i = 0; i < 7; ++i) {
-               ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
-       }
-
-       /* Some outputs may be artificially added, to match the inputs of the fragment program.
-        * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
-        * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
-        */
-       if (count > 0) {
-               struct prog_instruction *inst;
-
-               _mesa_insert_instructions(compiler->program, compiler->program->NumInstructions - 1, count);
-               inst = &compiler->program->Instructions[compiler->program->NumInstructions - 1 - count];
+       int i;
 
-               for (i = 0; i < VERT_RESULT_MAX; ++i) {
-                       if (OutputsAdded & (1 << i)) {
-                               inst->Opcode = OPCODE_MOV;
+       for(i = 0; i < 32; ++i) {
+               if ((compiler->RequiredOutputs & (1 << i)) &&
+                   !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+                       struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+                       inst->I.Opcode = OPCODE_MOV;
 
-                               inst->DstReg.File = PROGRAM_OUTPUT;
-                               inst->DstReg.Index = i;
-                               inst->DstReg.WriteMask = WRITEMASK_XYZW;
-                               inst->DstReg.CondMask = COND_TR;
+                       inst->I.DstReg.File = PROGRAM_OUTPUT;
+                       inst->I.DstReg.Index = i;
+                       inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
 
-                               inst->SrcReg[0].File = PROGRAM_CONSTANT;
-                               inst->SrcReg[0].Index = 0;
-                               inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+                       inst->I.SrcReg[0].File = PROGRAM_CONSTANT;
+                       inst->I.SrcReg[0].Index = 0;
+                       inst->I.SrcReg[0].Swizzle = SWIZZLE_XYZW;
 
-                               ++inst;
-                       }
+                       compiler->Base.Program.OutputsWritten |= 1 << i;
                }
-
-               compiler->program->OutputsWritten |= OutputsAdded;
        }
 }
 
-#undef ADD_OUTPUT
-
 static void nqssadceInit(struct nqssadce_state* s)
 {
        struct r300_vertex_program_compiler * compiler = s->UserData;
-       GLuint fp_reads;
-
-       fp_reads = compiler->state.FpReads;
-       {
-               if (fp_reads & FRAG_BIT_COL0) {
-                               s->Outputs[VERT_RESULT_COL0].Sourced = WRITEMASK_XYZW;
-                               s->Outputs[VERT_RESULT_BFC0].Sourced = WRITEMASK_XYZW;
-               }
-
-               if (fp_reads & FRAG_BIT_COL1) {
-                               s->Outputs[VERT_RESULT_COL1].Sourced = WRITEMASK_XYZW;
-                               s->Outputs[VERT_RESULT_BFC1].Sourced = WRITEMASK_XYZW;
-               }
-       }
+       int i;
 
-       {
-               int i;
-               for (i = 0; i < 8; ++i) {
-                       if (fp_reads & FRAG_BIT_TEX(i)) {
-                               s->Outputs[VERT_RESULT_TEX0 + i].Sourced = WRITEMASK_XYZW;
-                       }
+       for(i = 0; i < VERT_RESULT_MAX; ++i) {
+               if (compiler->RequiredOutputs & (1 << i)) {
+                       if (i != VERT_RESULT_PSIZ)
+                               s->Outputs[i].Sourced = WRITEMASK_XYZW;
+                       else
+                               s->Outputs[i].Sourced = WRITEMASK_X; /* ugly hack! */
                }
        }
-
-       s->Outputs[VERT_RESULT_HPOS].Sourced = WRITEMASK_XYZW;
-       if (s->Program->OutputsWritten & (1 << VERT_RESULT_PSIZ))
-               s->Outputs[VERT_RESULT_PSIZ].Sourced = WRITEMASK_X;
 }
 
 static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
@@ -772,12 +628,19 @@ static GLboolean swizzleIsNative(GLuint opcode, struct prog_src_register reg)
 
 void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
 {
+       rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+       compiler->program = 0;
+
        if (compiler->state.WPosAttr != FRAG_ATTRIB_MAX) {
-               pos_as_texcoord(compiler->program, compiler->state.WPosAttr - FRAG_ATTRIB_TEX0);
+               rc_copy_output(&compiler->Base,
+                       VERT_RESULT_HPOS,
+                       compiler->state.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
        }
 
        if (compiler->state.FogAttr != FRAG_ATTRIB_MAX) {
-               fog_as_texcoord(compiler->program, compiler->state.FogAttr - FRAG_ATTRIB_TEX0);
+               rc_move_output(&compiler->Base,
+                       VERT_RESULT_FOGC,
+                       compiler->state.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
        }
 
        addArtificialOutputs(compiler);
@@ -786,12 +649,12 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
                struct radeon_program_transformation transformations[] = {
                        { &r300_transform_vertex_alu, 0 },
                };
-               radeonLocalTransform(compiler->program, 1, transformations);
+               radeonLocalTransform(&compiler->Base, 1, transformations);
        }
 
        if (compiler->Base.Debug) {
                fprintf(stderr, "Vertex program after native rewrite:\n");
-               _mesa_print_program(compiler->program);
+               rc_print_program(&compiler->Base.Program);
                fflush(stdout);
        }
 
@@ -803,12 +666,12 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
                struct radeon_program_transformation transformations[] = {
                        { &transform_source_conflicts, 0 },
                };
-               radeonLocalTransform(compiler->program, 1, transformations);
+               radeonLocalTransform(&compiler->Base, 1, transformations);
        }
 
        if (compiler->Base.Debug) {
                fprintf(stderr, "Vertex program after source conflict resolve:\n");
-               _mesa_print_program(compiler->program);
+               rc_print_program(&compiler->Base.Program);
                fflush(stdout);
        }
 
@@ -818,9 +681,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
                        .IsNativeSwizzle = &swizzleIsNative,
                        .BuildSwizzle = NULL
                };
-               radeonNqssaDce(compiler->program, &nqssadce, compiler);
-
-               rc_mesa_to_rc_program(&compiler->Base, compiler->program);
+               radeonNqssaDce(&compiler->Base, &nqssadce, compiler);
 
                /* We need this step for reusing temporary registers */
                allocate_temporary_registers(compiler);
@@ -834,6 +695,8 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
 
        translate_vertex_program(compiler);
 
-       compiler->code->InputsRead = compiler->program->InputsRead;
-       compiler->code->OutputsWritten = compiler->program->OutputsWritten;
+       rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+
+       compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+       compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
 }