Merge remote branch 'origin/mesa_7_7_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
index 91d9d8ae949140f6c999d6dca5b5783c699d9ed8..c2f96af2c1215de84e1a635c4e7793df2c5f254b 100644 (file)
@@ -41,64 +41,194 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "tnl/tnl.h"
 
 #include "compiler/radeon_compiler.h"
-#include "compiler/radeon_nqssadce.h"
+#include "radeon_mesa_to_rc.h"
 #include "r300_context.h"
+#include "r300_fragprog_common.h"
 #include "r300_state.h"
 
-
-static int r300VertexProgUpdateParams(GLcontext * ctx, struct gl_vertex_program *vp, float *dst)
+/**
+ * Write parameter array for the given vertex program into dst.
+ * Return the total number of components written.
+ */
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
 {
-       int pi;
-       float *dst_o = dst;
-       struct gl_program_parameter_list *paramList;
+       int i;
 
-       if (vp->IsNVProgram) {
+       if (vp->Base->IsNVProgram) {
                _mesa_load_tracked_matrices(ctx);
+       } else {
+               if (vp->Base->Base.Parameters) {
+                       _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
+               }
+       }
 
-               for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
-                       *dst++ = ctx->VertexProgram.Parameters[pi][0];
-                       *dst++ = ctx->VertexProgram.Parameters[pi][1];
-                       *dst++ = ctx->VertexProgram.Parameters[pi][2];
-                       *dst++ = ctx->VertexProgram.Parameters[pi][3];
+       for(i = 0; i < vp->code.constants.Count; ++i) {
+               const float * src = 0;
+               const struct rc_constant * constant = &vp->code.constants.Constants[i];
+
+               switch(constant->Type) {
+               case RC_CONSTANT_EXTERNAL:
+                       if (vp->Base->IsNVProgram) {
+                               src = ctx->VertexProgram.Parameters[constant->u.External];
+                       } else {
+                               src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
+                       }
+                       break;
+
+               case RC_CONSTANT_IMMEDIATE:
+                       src = constant->u.Immediate;
+                       break;
                }
-               return dst - dst_o;
+
+               dst[4*i] = src[0];
+               dst[4*i + 1] = src[1];
+               dst[4*i + 2] = src[2];
+               dst[4*i + 3] = src[3];
        }
 
-       if (!vp->Base.Parameters)
-               return 0;
+       return 4 * vp->code.constants.Count;
+}
+
+static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
+{
+       GLbitfield outputs = 0;
+       int i;
 
-       _mesa_load_state_parameters(ctx, vp->Base.Parameters);
+#define ADD_OUTPUT(fp_attr, vp_result) \
+       do { \
+               if (fpreads & (1 << (fp_attr))) \
+                       outputs |= (1 << (vp_result)); \
+       } while (0)
 
-       if (vp->Base.Parameters->NumParameters * 4 >
-           VSF_MAX_FRAGMENT_LENGTH) {
-               fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
-               _mesa_exit(-1);
+       ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+       ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+       for (i = 0; i <= 7; ++i) {
+               ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
        }
 
-       paramList = vp->Base.Parameters;
-       for (pi = 0; pi < paramList->NumParameters; pi++) {
-               switch (paramList->Parameters[pi].Type) {
-               case PROGRAM_STATE_VAR:
-               case PROGRAM_NAMED_PARAM:
-                       //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
-               case PROGRAM_CONSTANT:
-                       *dst++ = paramList->ParameterValues[pi][0];
-                       *dst++ = paramList->ParameterValues[pi][1];
-                       *dst++ = paramList->ParameterValues[pi][2];
-                       *dst++ = paramList->ParameterValues[pi][3];
-                       break;
-               default:
-                       _mesa_problem(NULL, "Bad param type in %s",
-                                     __FUNCTION__);
+#undef ADD_OUTPUT
+
+       if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
+           (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
+               outputs |= 1 << VERT_RESULT_BFC0;
+       if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
+           (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
+               outputs |= 1 << VERT_RESULT_BFC1;
+
+       outputs |= 1 << VERT_RESULT_HPOS;
+       if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+               outputs |= 1 << VERT_RESULT_PSIZ;
+
+       return outputs;
+}
+
+
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
+{
+       int i;
+       int cur_reg;
+       GLuint OutputsWritten, InputsRead;
+
+       OutputsWritten = c->Base.Program.OutputsWritten;
+       InputsRead = c->Base.Program.InputsRead;
+
+       cur_reg = -1;
+       for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (InputsRead & (1 << i))
+                       c->code->inputs[i] = ++cur_reg;
+               else
+                       c->code->inputs[i] = -1;
+       }
+
+       cur_reg = 0;
+       for (i = 0; i < VERT_RESULT_MAX; i++)
+               c->code->outputs[i] = -1;
+
+       assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
+
+       if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+               c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
+       }
+
+       if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+               c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+       }
+
+       /* If we're writing back facing colors we need to send
+        * four colors to make front/back face colors selection work.
+        * If the vertex program doesn't write all 4 colors, lets
+        * pretend it does by skipping output index reg so the colors
+        * get written into appropriate output vectors.
+        */
+       if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
+               c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
+       }
+
+       if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+               c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
+       }
+
+       if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+               c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
+       }
+
+       if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+               cur_reg++;
+       }
+
+       for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
+               if (OutputsWritten & (1 << i)) {
+                       c->code->outputs[i] = cur_reg++;
                }
+       }
 
+       if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+               c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
        }
+}
 
-       return dst - dst_o;
+/**
+ * The NV_vertex_program spec mandates that all registers be
+ * initialized to zero. We do this here unconditionally.
+ *
+ * \note We rely on dead-code elimination in the compiler.
+ */
+static void initialize_NV_registers(struct radeon_compiler * compiler)
+{
+       unsigned int reg;
+       struct rc_instruction * inst;
+
+       for(reg = 0; reg < 12; ++reg) {
+               inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+               inst->U.I.Opcode = RC_OPCODE_MOV;
+               inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst->U.I.DstReg.Index = reg;
+               inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+               inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+       }
+
+       inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+       inst->U.I.Opcode = RC_OPCODE_ARL;
+       inst->U.I.DstReg.File = RC_FILE_ADDRESS;
+       inst->U.I.DstReg.Index = 0;
+       inst->U.I.DstReg.WriteMask = WRITEMASK_X;
+       inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
 }
 
 static struct r300_vertex_program *build_program(GLcontext *ctx,
-                                                struct r300_vertex_program_external_state *wanted_key,
+                                                struct r300_vertex_program_key *wanted_key,
                                                 const struct gl_vertex_program *mesa_vp)
 {
        struct r300_vertex_program *vp;
@@ -109,25 +239,52 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
        _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
 
        rc_init(&compiler.Base);
-       compiler.Base.Debug = (RADEON_DEBUG & DEBUG_VERTS) ? GL_TRUE : GL_FALSE;
+       compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
 
        compiler.code = &vp->code;
-       compiler.state = vp->key;
-       compiler.program = vp->Base;
+       compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
+       compiler.SetHwInputOutput = &t_inputs_outputs;
 
        if (compiler.Base.Debug) {
                fprintf(stderr, "Initial vertex program:\n");
-               _mesa_print_program(compiler.program);
-               fflush(stdout);
+               _mesa_print_program(&vp->Base->Base);
+               fflush(stderr);
        }
 
        if (mesa_vp->IsPositionInvariant) {
-               _mesa_insert_mvp_code(ctx, (struct gl_vertex_program *)compiler.program);
+               _mesa_insert_mvp_code(ctx, vp->Base);
+       }
+
+       radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
+
+       if (mesa_vp->IsNVProgram)
+               initialize_NV_registers(&compiler.Base);
+
+       rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
+
+       if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
+               rc_copy_output(&compiler.Base,
+                       VERT_RESULT_HPOS,
+                       vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+       }
+
+       if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
+               rc_move_output(&compiler.Base,
+                       VERT_RESULT_FOGC,
+                       vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
        }
 
        r3xx_compile_vertex_program(&compiler);
+
+       if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
+               rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
+       }
+
        vp->error = compiler.Base.Error;
 
+       vp->Base->Base.InputsRead = vp->code.InputsRead;
+       vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
+
        rc_destroy(&compiler.Base);
 
        return vp;
@@ -136,14 +293,28 @@ static struct r300_vertex_program *build_program(GLcontext *ctx,
 struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
 {
        r300ContextPtr r300 = R300_CONTEXT(ctx);
-       struct r300_vertex_program_external_state wanted_key = { 0 };
+       struct r300_vertex_program_key wanted_key = { 0 };
        struct r300_vertex_program_cont *vpc;
        struct r300_vertex_program *vp;
 
        vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+
+       if (!r300->selected_fp) {
+               /* This can happen when GetProgramiv is called to check
+                * whether the program runs natively.
+                *
+                * To be honest, this is not a very good solution,
+                * but solving the problem of reporting good values
+                * for those queries is tough anyway considering that
+                * we recompile vertex programs based on the precise
+                * fragment program that is in use.
+                */
+               r300SelectAndTranslateFragmentShader(ctx);
+       }
+
        wanted_key.FpReads = r300->selected_fp->InputsRead;
-       wanted_key.FogAttr = r300->selected_fp->code.fog_attr;
-       wanted_key.WPosAttr = r300->selected_fp->code.wpos_attr;
+       wanted_key.FogAttr = r300->selected_fp->fog_attr;
+       wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
 
        for (vp = vpc->progs; vp; vp = vp->next) {
                if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
@@ -162,7 +333,6 @@ struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
 #define bump_vpu_count(ptr, new_count)   do { \
                drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
                int _nc=(new_count)/4; \
-               assert(_nc < 256); \
                if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
        } while(0)
 
@@ -172,6 +342,8 @@ static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_ver
 
        assert((code->length > 0) && (code->length % 4 == 0));
 
+       R300_STATECHANGE( r300, vap_flush );
+
        switch ((dest >> 8) & 0xf) {
                case 0:
                        R300_STATECHANGE(r300, vpi);
@@ -209,8 +381,9 @@ void r300SetupVertexProgram(r300ContextPtr rmesa)
        ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
        ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
 
+       R300_STATECHANGE(rmesa, vap_flush);
        R300_STATECHANGE(rmesa, vpp);
-       param_count = r300VertexProgUpdateParams(ctx, prog->Base, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+       param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
        bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
        param_count /= 4;