Make sure we use only signed/unsigned ints with bitfields.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
index b9f6d28357ee4e7da371c39ae7410487b5c67eb6..aa98a049aa4aad5d64ab00c5997263a3200c6f25 100644 (file)
@@ -1,6 +1,7 @@
 /**************************************************************************
 
-Copyright (C) 2005 Aapo Tahkola.
+Copyright (C) 2005  Aapo Tahkola <aet@rasterburn.org>
+Copyright (C) 2008  Oliver McFadden <z3ro.geek@gmail.com>
 
 All Rights Reserved.
 
@@ -25,1419 +26,377 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 **************************************************************************/
 
-/**
- * \file
- *
- * \author Aapo Tahkola <aet@rasterburn.org>
- *
- * \author Oliver McFadden <z3ro.geek@gmail.com>
- *
- * \todo A VE_MULTIPLY_ADD or VE_MULTIPLYX2_ADD opcode with all 3 source
- * operands using unique PVS_REG_TEMPORARY vector addresses requires special
- * handling, which is currently not implemented!
- *
- * For a description of the vertex program instruction set see r300_reg.h.
- */
+/* Radeon R5xx Acceleration, Revision 1.2 */
 
 #include "main/glheader.h"
 #include "main/macros.h"
 #include "main/enums.h"
 #include "shader/program.h"
+#include "shader/programopt.h"
 #include "shader/prog_instruction.h"
+#include "shader/prog_optimize.h"
 #include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
 #include "shader/prog_statevars.h"
 #include "tnl/tnl.h"
 
+#include "compiler/radeon_compiler.h"
+#include "radeon_mesa_to_rc.h"
 #include "r300_context.h"
+#include "r300_fragprog_common.h"
+#include "r300_state.h"
 
-/* TODO: Get rid of t_src_class call */
-#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
-                      ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
-                        t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
-                       (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
-                        t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
-
-#define ZERO_SRC_0 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ZERO_SRC_1 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ZERO_SRC_2 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
-
-#define ONE_SRC_0 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ONE_SRC_1 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ONE_SRC_2 (PVS_SOURCE_OPCODE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
-
-#define FREE_TEMPS() \
-       do { \
-               int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
-               if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
-                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
-                       vp->native = GL_FALSE; \
-               } \
-               u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
-       } while (0)
-
-int r300VertexProgUpdateParams(GLcontext * ctx,
-                              struct r300_vertex_program_cont *vp,
-                              float *dst)
+/**
+ * Write parameter array for the given vertex program into dst.
+ * Return the total number of components written.
+ */
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
 {
-       int pi;
-       struct gl_vertex_program *mesa_vp = &vp->mesa_program;
-       float *dst_o = dst;
-       struct gl_program_parameter_list *paramList;
+       int i;
 
-       if (mesa_vp->IsNVProgram) {
+       if (vp->Base->IsNVProgram) {
                _mesa_load_tracked_matrices(ctx);
-
-               for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
-                       *dst++ = ctx->VertexProgram.Parameters[pi][0];
-                       *dst++ = ctx->VertexProgram.Parameters[pi][1];
-                       *dst++ = ctx->VertexProgram.Parameters[pi][2];
-                       *dst++ = ctx->VertexProgram.Parameters[pi][3];
+       } else {
+               if (vp->Base->Base.Parameters) {
+                       _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
                }
-               return dst - dst_o;
        }
 
-       assert(mesa_vp->Base.Parameters);
-       _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+       for(i = 0; i < vp->code.constants.Count; ++i) {
+               const float * src = 0;
+               const struct rc_constant * constant = &vp->code.constants.Constants[i];
 
-       if (mesa_vp->Base.Parameters->NumParameters * 4 >
-           VSF_MAX_FRAGMENT_LENGTH) {
-               fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
-               _mesa_exit(-1);
-       }
-
-       paramList = mesa_vp->Base.Parameters;
-       for (pi = 0; pi < paramList->NumParameters; pi++) {
-               switch (paramList->Parameters[pi].Type) {
-
-               case PROGRAM_STATE_VAR:
-               case PROGRAM_NAMED_PARAM:
-                       //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
-               case PROGRAM_CONSTANT:
-                       *dst++ = paramList->ParameterValues[pi][0];
-                       *dst++ = paramList->ParameterValues[pi][1];
-                       *dst++ = paramList->ParameterValues[pi][2];
-                       *dst++ = paramList->ParameterValues[pi][3];
+               switch(constant->Type) {
+               case RC_CONSTANT_EXTERNAL:
+                       if (vp->Base->IsNVProgram) {
+                               src = ctx->VertexProgram.Parameters[constant->u.External];
+                       } else {
+                               src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
+                       }
                        break;
 
-               default:
-                       _mesa_problem(NULL, "Bad param type in %s",
-                                     __FUNCTION__);
+               case RC_CONSTANT_IMMEDIATE:
+                       src = constant->u.Immediate;
+                       break;
                }
 
+               dst[4*i] = src[0];
+               dst[4*i + 1] = src[1];
+               dst[4*i + 2] = src[2];
+               dst[4*i + 3] = src[3];
        }
 
-       return dst - dst_o;
-}
-
-static unsigned long t_dst_mask(GLuint mask)
-{
-       /* WRITEMASK_* is equivalent to VSF_FLAG_* */
-       return mask & VSF_FLAG_ALL;
-}
-
-static unsigned long t_dst_class(enum register_file file)
-{
-
-       switch (file) {
-       case PROGRAM_TEMPORARY:
-               return PVS_DST_REG_TEMPORARY;
-       case PROGRAM_OUTPUT:
-               return PVS_DST_REG_OUT;
-       case PROGRAM_ADDRESS:
-               return PVS_DST_REG_A0;
-               /*
-                  case PROGRAM_INPUT:
-                  case PROGRAM_LOCAL_PARAM:
-                  case PROGRAM_ENV_PARAM:
-                  case PROGRAM_NAMED_PARAM:
-                  case PROGRAM_STATE_VAR:
-                  case PROGRAM_WRITE_ONLY:
-                  case PROGRAM_ADDRESS:
-                */
-       default:
-               fprintf(stderr, "problem in %s", __FUNCTION__);
-               _mesa_exit(-1);
-               return -1;
-       }
-}
-
-static unsigned long t_dst_index(struct r300_vertex_program *vp,
-                                struct prog_dst_register *dst)
-{
-       if (dst->File == PROGRAM_OUTPUT)
-               return vp->outputs[dst->Index];
-
-       return dst->Index;
-}
-
-static unsigned long t_src_class(enum register_file file)
-{
-
-       switch (file) {
-       case PROGRAM_TEMPORARY:
-               return PVS_SRC_REG_TEMPORARY;
-
-       case PROGRAM_INPUT:
-               return PVS_SRC_REG_INPUT;
-
-       case PROGRAM_LOCAL_PARAM:
-       case PROGRAM_ENV_PARAM:
-       case PROGRAM_NAMED_PARAM:
-       case PROGRAM_STATE_VAR:
-               return PVS_SRC_REG_CONSTANT;
-               /*
-                  case PROGRAM_OUTPUT:
-                  case PROGRAM_WRITE_ONLY:
-                  case PROGRAM_ADDRESS:
-                */
-       default:
-               fprintf(stderr, "problem in %s", __FUNCTION__);
-               _mesa_exit(-1);
-               return -1;
-       }
-}
-
-static inline unsigned long t_swizzle(GLubyte swizzle)
-{
-/* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
-       return swizzle;
-}
-
-#if 0
-static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
-{
-       int i;
-
-       if (vp == NULL) {
-               fprintf(stderr, "vp null in call to %s from %s\n",
-                       __FUNCTION__, caller);
-               return;
-       }
-
-       fprintf(stderr, "%s:<", caller);
-       for (i = 0; i < VERT_ATTRIB_MAX; i++)
-               fprintf(stderr, "%d ", vp->inputs[i]);
-       fprintf(stderr, ">\n");
-
+       return 4 * vp->code.constants.Count;
 }
-#endif
 
-static unsigned long t_src_index(struct r300_vertex_program *vp,
-                                struct prog_src_register *src)
+static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
 {
+       GLbitfield outputs = 0;
        int i;
-       int max_reg = -1;
 
-       if (src->File == PROGRAM_INPUT) {
-               if (vp->inputs[src->Index] != -1)
-                       return vp->inputs[src->Index];
-
-               for (i = 0; i < VERT_ATTRIB_MAX; i++)
-                       if (vp->inputs[i] > max_reg)
-                               max_reg = vp->inputs[i];
-
-               vp->inputs[src->Index] = max_reg + 1;
+#define ADD_OUTPUT(fp_attr, vp_result) \
+       do { \
+               if (fpreads & (1 << (fp_attr))) \
+                       outputs |= (1 << (vp_result)); \
+       } while (0)
 
-               //vp_dump_inputs(vp, __FUNCTION__);
+       ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+       ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
 
-               return vp->inputs[src->Index];
-       } else {
-               if (src->Index < 0) {
-                       fprintf(stderr,
-                               "negative offsets for indirect addressing do not work.\n");
-                       return 0;
-               }
-               return src->Index;
+       for (i = 0; i <= 7; ++i) {
+               ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
        }
-}
 
-/* these two functions should probably be merged... */
+#undef ADD_OUTPUT
 
-static unsigned long t_src(struct r300_vertex_program *vp,
-                          struct prog_src_register *src)
-{
-       /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
-        * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
-        */
-       return PVS_SOURCE_OPCODE(t_src_index(vp, src),
-                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                              t_swizzle(GET_SWZ(src->Swizzle, 1)),
-                              t_swizzle(GET_SWZ(src->Swizzle, 2)),
-                              t_swizzle(GET_SWZ(src->Swizzle, 3)),
-                              t_src_class(src->File),
-                              src->NegateBase) | (src->RelAddr << 4);
-}
+       if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
+           (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
+               outputs |= 1 << VERT_RESULT_BFC0;
+       if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
+           (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
+               outputs |= 1 << VERT_RESULT_BFC1;
 
-static unsigned long t_src_scalar(struct r300_vertex_program *vp,
-                                 struct prog_src_register *src)
-{
-       /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
-        * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
-        */
-       return PVS_SOURCE_OPCODE(t_src_index(vp, src),
-                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                              t_src_class(src->File),
-                              src->
-                              NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src->RelAddr << 4);
-}
+       outputs |= 1 << VERT_RESULT_HPOS;
+       if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+               outputs |= 1 << VERT_RESULT_PSIZ;
 
-static GLboolean valid_dst(struct r300_vertex_program *vp,
-                          struct prog_dst_register *dst)
-{
-       if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
-               return GL_FALSE;
-       } else if (dst->File == PROGRAM_ADDRESS) {
-               assert(dst->Index == 0);
-       }
-
-       return GL_TRUE;
+       return outputs;
 }
 
-/*
- * Instruction    Inputs  Output   Description
- * -----------    ------  ------   --------------------------------
- * ABS            v       v        absolute value
- * ADD            v,v     v        add
- * ARL            s       a        address register load
- * DP3            v,v     ssss     3-component dot product
- * DP4            v,v     ssss     4-component dot product
- * DPH            v,v     ssss     homogeneous dot product
- * DST            v,v     v        distance vector
- * EX2            s       ssss     exponential base 2
- * EXP            s       v        exponential base 2 (approximate)
- * FLR            v       v        floor
- * FRC            v       v        fraction
- * LG2            s       ssss     logarithm base 2
- * LIT            v       v        compute light coefficients
- * LOG            s       v        logarithm base 2 (approximate)
- * MAD            v,v,v   v        multiply and add
- * MAX            v,v     v        maximum
- * MIN            v,v     v        minimum
- * MOV            v       v        move
- * MUL            v,v     v        multiply
- * POW            s,s     ssss     exponentiate
- * RCP            s       ssss     reciprocal
- * RSQ            s       ssss     reciprocal square root
- * SGE            v,v     v        set on greater than or equal
- * SLT            v,v     v        set on less than
- * SUB            v,v     v        subtract
- * SWZ            v       v        extended swizzle
- * XPD            v,v     v        cross product
- *
- * Table X.5:  Summary of vertex program instructions.  "v" indicates a
- * floating-point vector input or output, "s" indicates a floating-point
- * scalar input, "ssss" indicates a scalar output replicated across a
- * 4-component result vector, and "a" indicates a single address register
- * component.
- */
-
-static GLuint *t_opcode_abs(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_MAXIMUM, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] =
-           PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
-                           t_src_class(src[0].File),
-                           (!src[0].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src[0].RelAddr << 4);
-       inst[3] = 0;
-
-       return inst;
-}
 
-static GLuint *t_opcode_add(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_arl(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_FLT2FIX_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_dp3(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_DOT_PRODUCT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] =
-           PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           SWIZZLE_ZERO, t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
-           (src[0].RelAddr << 4);
-       inst[2] =
-           PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           SWIZZLE_ZERO, t_src_class(src[1].File),
-                           src[1].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
-           (src[1].RelAddr << 4);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_dp4(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_DOT_PRODUCT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_dph(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_DOT_PRODUCT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] =
-           PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           PVS_SRC_SELECT_FORCE_1, t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
-           (src[0].RelAddr << 4);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_dst(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_DISTANCE_VECTOR, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_ex2(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_MATH_OPCODE(ME_EXP_BASE2_FULL_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src_scalar(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_exp(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_MATH_OPCODE(ME_EXP_BASE2_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src_scalar(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_flr(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3], int *u_temp_i)
-{
-       /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
-          ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_FRACTION, *u_temp_i,
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       PVS_DST_REG_TEMPORARY);
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-       inst += 4;
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] =
-           PVS_SOURCE_OPCODE(*u_temp_i, PVS_SRC_SELECT_X,
-                           PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
-                           PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
-                           /* Not 100% sure about this */
-                           (!src[0].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
-                           /*VSF_FLAG_ALL */ );
-       inst[3] = ZERO_SRC_0;
-       (*u_temp_i)--;
-
-       return inst;
-}
-
-static GLuint *t_opcode_frc(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_FRACTION, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_lg2(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
-
-       inst[0] =
-           PVS_MATH_OPCODE(ME_LOG_BASE2_FULL_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] =
-           PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src[0].RelAddr << 4);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_lit(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
-
-       inst[0] =
-           PVS_MATH_OPCODE(ME_LIGHT_COEFF_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       /* NOTE: Users swizzling might not work. */
-       inst[1] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)),    // x
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // w
-                                 PVS_SRC_SELECT_FORCE_0,       // z
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // y
-                                 t_src_class(src[0].File),
-                                 src[0].
-                                 NegateBase ? VSF_FLAG_ALL :
-                                 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-       inst[2] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),    // y
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // w
-                                 PVS_SRC_SELECT_FORCE_0,       // z
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // x
-                                 t_src_class(src[0].File),
-                                 src[0].
-                                 NegateBase ? VSF_FLAG_ALL :
-                                 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-       inst[3] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),    // y
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // x
-                                 PVS_SRC_SELECT_FORCE_0,       // z
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // w
-                                 t_src_class(src[0].File),
-                                 src[0].
-                                 NegateBase ? VSF_FLAG_ALL :
-                                 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-
-       return inst;
-}
-
-static GLuint *t_opcode_log(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_MATH_OPCODE(ME_LOG_BASE2_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src_scalar(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_mad(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_MULTIPLY_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = t_src(vp, &src[2]);
-
-       return inst;
-}
-
-static GLuint *t_opcode_max(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_MAXIMUM, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_min(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_MINIMUM, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_mov(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_mul(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_MULTIPLY, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_pow(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_MATH_OPCODE(ME_POWER_FUNC_FF, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src_scalar(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = t_src_scalar(vp, &src[1]);
-
-       return inst;
-}
-
-static GLuint *t_opcode_rcp(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_MATH_OPCODE(ME_RECIP_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src_scalar(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_rsq(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_MATH_OPCODE(ME_RECIP_SQRT_DX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src_scalar(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_sge(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_SET_GREATER_THAN_EQUAL, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_slt(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_SET_LESS_THAN, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = t_src(vp, &src[1]);
-       inst[3] = ZERO_SRC_1;
-
-       return inst;
-}
-
-static GLuint *t_opcode_sub(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] =
-           PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
-                           t_src_class(src[1].File),
-                           (!src[1].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src[1].RelAddr << 4);
-       inst[3] = 0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_swz(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3])
-{
-       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = t_src(vp, &src[0]);
-       inst[2] = ZERO_SRC_0;
-       inst[3] = ZERO_SRC_0;
-
-       return inst;
-}
-
-static GLuint *t_opcode_xpd(struct r300_vertex_program *vp,
-                           struct prog_instruction *vpi, GLuint * inst,
-                           struct prog_src_register src[3], int *u_temp_i)
-{
-       /* mul r0, r1.yzxw, r2.zxyw
-          mad r0, -r2.yzxw, r1.zxyw, r0
-        */
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_MULTIPLY_ADD, *u_temp_i,
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       PVS_DST_REG_TEMPORARY);
-       inst[1] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),    // y
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),        // z
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // x
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // w
-                                 t_src_class(src[0].File),
-                                 src[0].
-                                 NegateBase ? VSF_FLAG_ALL :
-                                 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-       inst[2] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)),    // z
-                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // x
-                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),        // y
-                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // w
-                                 t_src_class(src[1].File),
-                                 src[1].
-                                 NegateBase ? VSF_FLAG_ALL :
-                                 VSF_FLAG_NONE) | (src[1].RelAddr << 4);
-       inst[3] = ZERO_SRC_1;
-       inst += 4;
-
-       inst[0] =
-           PVS_VECTOR_OPCODE(VE_MULTIPLY_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       inst[1] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)),    // y
-                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),        // z
-                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // x
-                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // w
-                                 t_src_class(src[1].File),
-                                 (!src[1].
-                                  NegateBase) ? VSF_FLAG_ALL :
-                                 VSF_FLAG_NONE) | (src[1].RelAddr << 4);
-       inst[2] = PVS_SOURCE_OPCODE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)),    // z
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // x
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // y
-                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // w
-                                 t_src_class(src[0].File),
-                                 src[0].
-                                 NegateBase ? VSF_FLAG_ALL :
-                                 VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-       inst[3] =
-           PVS_SOURCE_OPCODE(*u_temp_i, PVS_SRC_SELECT_X,
-                           PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
-                           PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
-                           VSF_FLAG_NONE);
-
-       (*u_temp_i)--;
-
-       return inst;
-}
-
-static void t_inputs_outputs(struct r300_vertex_program *vp)
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
 {
        int i;
-       int cur_reg = 0;
+       int cur_reg;
+       GLuint OutputsWritten, InputsRead;
 
-       for (i = 0; i < VERT_ATTRIB_MAX; i++)
-               vp->inputs[i] = -1;
+       OutputsWritten = c->Base.Program.OutputsWritten;
+       InputsRead = c->Base.Program.InputsRead;
 
+       cur_reg = -1;
+       for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (InputsRead & (1 << i))
+                       c->code->inputs[i] = ++cur_reg;
+               else
+                       c->code->inputs[i] = -1;
+       }
+
+       cur_reg = 0;
        for (i = 0; i < VERT_RESULT_MAX; i++)
-               vp->outputs[i] = -1;
+               c->code->outputs[i] = -1;
 
-       assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
+       assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
 
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
-               vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+       if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+               c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
        }
 
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
-               vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+       if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+               c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
        }
 
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
-               vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+       /* If we're writing back facing colors we need to send
+        * four colors to make front/back face colors selection work.
+        * If the vertex program doesn't write all 4 colors, lets
+        * pretend it does by skipping output index reg so the colors
+        * get written into appropriate output vectors.
+        */
+       if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
+               c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
        }
 
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
-               vp->outputs[VERT_RESULT_COL1] =
-                   vp->outputs[VERT_RESULT_COL0] + 1;
-               cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
+       if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+               c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
        }
 
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
-               vp->outputs[VERT_RESULT_BFC0] =
-                   vp->outputs[VERT_RESULT_COL0] + 2;
-               cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
+       if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+               c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
        }
 
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
-               vp->outputs[VERT_RESULT_BFC1] =
-                   vp->outputs[VERT_RESULT_COL0] + 3;
-               cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
+       if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
+       } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+               cur_reg++;
        }
-#if 0
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
-               vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
-       }
-#endif
 
        for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
-               if (vp->key.OutputsWritten & (1 << i)) {
-                       vp->outputs[i] = cur_reg++;
+               if (OutputsWritten & (1 << i)) {
+                       c->code->outputs[i] = cur_reg++;
                }
        }
-}
-
-static void r300TranslateVertexShader(struct r300_vertex_program *vp,
-                                     struct prog_instruction *vpi)
-{
-       int i;
-       GLuint *inst;
-       unsigned long num_operands;
-       /* Initial value should be last tmp reg that hw supports.
-          Strangely enough r300 doesnt mind even though these would be out of range.
-          Smart enough to realize that it doesnt need it? */
-       int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
-       struct prog_src_register src[3];
-
-       vp->pos_end = 0;        /* Not supported yet */
-       vp->program.length = 0;
-       /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
-       vp->translated = GL_TRUE;
-       vp->native = GL_TRUE;
-
-       t_inputs_outputs(vp);
-
-       for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
-            vpi++, inst += 4) {
-
-               FREE_TEMPS();
-
-               if (!valid_dst(vp, &vpi->DstReg)) {
-                       /* redirect result to unused temp */
-                       vpi->DstReg.File = PROGRAM_TEMPORARY;
-                       vpi->DstReg.Index = u_temp_i;
-               }
 
-               num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
-
-               /* copy the sources (src) from mesa into a local variable... is this needed? */
-               for (i = 0; i < num_operands; i++) {
-                       src[i] = vpi->SrcReg[i];
-               }
-
-               if (num_operands == 3) {        /* TODO: scalars */
-                       if (CMP_SRCS(src[1], src[2])
-                           || CMP_SRCS(src[0], src[2])) {
-                               inst[0] =
-                                   PVS_VECTOR_OPCODE(VE_ADD,
-                                               u_temp_i, VSF_FLAG_ALL,
-                                               PVS_DST_REG_TEMPORARY);
-                               inst[1] =
-                                   PVS_SOURCE_OPCODE(t_src_index
-                                                   (vp, &src[2]),
-                                                   SWIZZLE_X, SWIZZLE_Y,
-                                                   SWIZZLE_Z, SWIZZLE_W,
-                                                   t_src_class(src[2].
-                                                               File),
-                                                   VSF_FLAG_NONE) |
-                                   (src[2].RelAddr << 4);
-                               inst[2] = ZERO_SRC_2;
-                               inst[3] = ZERO_SRC_2;
-                               inst += 4;
-
-                               src[2].File = PROGRAM_TEMPORARY;
-                               src[2].Index = u_temp_i;
-                               src[2].RelAddr = 0;
-                               u_temp_i--;
-                       }
-               }
-
-               if (num_operands >= 2) {
-                       if (CMP_SRCS(src[1], src[0])) {
-                               inst[0] =
-                                   PVS_VECTOR_OPCODE(VE_ADD,
-                                               u_temp_i, VSF_FLAG_ALL,
-                                               PVS_DST_REG_TEMPORARY);
-                               inst[1] =
-                                   PVS_SOURCE_OPCODE(t_src_index
-                                                   (vp, &src[0]),
-                                                   SWIZZLE_X, SWIZZLE_Y,
-                                                   SWIZZLE_Z, SWIZZLE_W,
-                                                   t_src_class(src[0].
-                                                               File),
-                                                   VSF_FLAG_NONE) |
-                                   (src[0].RelAddr << 4);
-                               inst[2] = ZERO_SRC_0;
-                               inst[3] = ZERO_SRC_0;
-                               inst += 4;
-
-                               src[0].File = PROGRAM_TEMPORARY;
-                               src[0].Index = u_temp_i;
-                               src[0].RelAddr = 0;
-                               u_temp_i--;
-                       }
-               }
-
-               switch (vpi->Opcode) {
-               case OPCODE_ABS:
-                       inst = t_opcode_abs(vp, vpi, inst, src);
-                       break;
-               case OPCODE_ADD:
-                       inst = t_opcode_add(vp, vpi, inst, src);
-                       break;
-               case OPCODE_ARL:
-                       inst = t_opcode_arl(vp, vpi, inst, src);
-                       break;
-               case OPCODE_DP3:
-                       inst = t_opcode_dp3(vp, vpi, inst, src);
-                       break;
-               case OPCODE_DP4:
-                       inst = t_opcode_dp4(vp, vpi, inst, src);
-                       break;
-               case OPCODE_DPH:
-                       inst = t_opcode_dph(vp, vpi, inst, src);
-                       break;
-               case OPCODE_DST:
-                       inst = t_opcode_dst(vp, vpi, inst, src);
-                       break;
-               case OPCODE_EX2:
-                       inst = t_opcode_ex2(vp, vpi, inst, src);
-                       break;
-               case OPCODE_EXP:
-                       inst = t_opcode_exp(vp, vpi, inst, src);
-                       break;
-               case OPCODE_FLR:
-                       inst =
-                           t_opcode_flr(vp, vpi, inst, src, /* FIXME */
-                                        &u_temp_i);
-                       break;
-               case OPCODE_FRC:
-                       inst = t_opcode_frc(vp, vpi, inst, src);
-                       break;
-               case OPCODE_LG2:
-                       inst = t_opcode_lg2(vp, vpi, inst, src);
-                       break;
-               case OPCODE_LIT:
-                       inst = t_opcode_lit(vp, vpi, inst, src);
-                       break;
-               case OPCODE_LOG:
-                       inst = t_opcode_log(vp, vpi, inst, src);
-                       break;
-               case OPCODE_MAD:
-                       inst = t_opcode_mad(vp, vpi, inst, src);
-                       break;
-               case OPCODE_MAX:
-                       inst = t_opcode_max(vp, vpi, inst, src);
-                       break;
-               case OPCODE_MIN:
-                       inst = t_opcode_min(vp, vpi, inst, src);
-                       break;
-               case OPCODE_MOV:
-                       inst = t_opcode_mov(vp, vpi, inst, src);
-                       break;
-               case OPCODE_MUL:
-                       inst = t_opcode_mul(vp, vpi, inst, src);
-                       break;
-               case OPCODE_POW:
-                       inst = t_opcode_pow(vp, vpi, inst, src);
-                       break;
-               case OPCODE_RCP:
-                       inst = t_opcode_rcp(vp, vpi, inst, src);
-                       break;
-               case OPCODE_RSQ:
-                       inst = t_opcode_rsq(vp, vpi, inst, src);
-                       break;
-               case OPCODE_SGE:
-                       inst = t_opcode_sge(vp, vpi, inst, src);
-                       break;
-               case OPCODE_SLT:
-                       inst = t_opcode_slt(vp, vpi, inst, src);
-                       break;
-               case OPCODE_SUB:
-                       inst = t_opcode_sub(vp, vpi, inst, src);
-                       break;
-               case OPCODE_SWZ:
-                       inst = t_opcode_swz(vp, vpi, inst, src);
-                       break;
-               case OPCODE_XPD:
-                       inst =
-                           t_opcode_xpd(vp, vpi, inst, src, /* FIXME */
-                                        &u_temp_i);
-                       break;
-               default:
-                       assert(0);
-                       break;
-               }
+       if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+               c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
        }
+}
 
-       /* Some outputs may be artificially added, to match the inputs
-          of the fragment program. Blank the outputs here. */
-       for (i = 0; i < VERT_RESULT_MAX; i++) {
-               if (vp->key.OutputsAdded & (1 << i)) {
-                       inst[0] = PVS_VECTOR_OPCODE(VE_ADD, vp->outputs[i],
-                                                   VSF_FLAG_ALL,
-                                                   PVS_DST_REG_OUT);
-                       inst[1] = ZERO_SRC_0;
-                       inst[2] = ZERO_SRC_0;
-                       inst[3] = ZERO_SRC_0;
-                       inst += 4;
-               }
+/**
+ * The NV_vertex_program spec mandates that all registers be
+ * initialized to zero. We do this here unconditionally.
+ *
+ * \note We rely on dead-code elimination in the compiler.
+ */
+static void initialize_NV_registers(struct radeon_compiler * compiler)
+{
+       unsigned int reg;
+       struct rc_instruction * inst;
+
+       for(reg = 0; reg < 12; ++reg) {
+               inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+               inst->U.I.Opcode = RC_OPCODE_MOV;
+               inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+               inst->U.I.DstReg.Index = reg;
+               inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+               inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
        }
 
-       vp->program.length = (inst - vp->program.body.i);
-       if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
-               vp->program.length = 0;
-               vp->native = GL_FALSE;
-       }
-#if 0
-       fprintf(stderr, "hw program:\n");
-       for (i = 0; i < vp->program.length; i++)
-               fprintf(stderr, "%08x\n", vp->program.body.d[i]);
-#endif
+       inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+       inst->U.I.Opcode = RC_OPCODE_ARL;
+       inst->U.I.DstReg.File = RC_FILE_ADDRESS;
+       inst->U.I.DstReg.Index = 0;
+       inst->U.I.DstReg.WriteMask = WRITEMASK_X;
+       inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
 }
 
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
-
-static void position_invariant(struct gl_program *prog)
+static struct r300_vertex_program *build_program(GLcontext *ctx,
+                                                struct r300_vertex_program_key *wanted_key,
+                                                const struct gl_vertex_program *mesa_vp)
 {
-       struct prog_instruction *vpi;
-       struct gl_program_parameter_list *paramList;
-       int i;
-
-       gl_state_index tokens[STATE_LENGTH] =
-           { STATE_MVP_MATRIX, 0, 0, 0, 0 };
-
-       /* tokens[4] = matrix modifier */
-#ifdef PREFER_DP4
-       tokens[4] = 0;          /* not transposed or inverted */
-#else
-       tokens[4] = STATE_MATRIX_TRANSPOSE;
-#endif
-       paramList = prog->Parameters;
-
-       vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
-       _mesa_init_instructions(vpi, prog->NumInstructions + 4);
-
-       for (i = 0; i < 4; i++) {
-               GLint idx;
-               tokens[2] = tokens[3] = i;      /* matrix row[i]..row[i] */
-               idx = _mesa_add_state_reference(paramList, tokens);
-#ifdef PREFER_DP4
-               vpi[i].Opcode = OPCODE_DP4;
-               vpi[i].StringPos = 0;
-               vpi[i].Data = 0;
-
-               vpi[i].DstReg.File = PROGRAM_OUTPUT;
-               vpi[i].DstReg.Index = VERT_RESULT_HPOS;
-               vpi[i].DstReg.WriteMask = 1 << i;
-               vpi[i].DstReg.CondMask = COND_TR;
-
-               vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
-               vpi[i].SrcReg[0].Index = idx;
-               vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
-               vpi[i].SrcReg[1].File = PROGRAM_INPUT;
-               vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
-               vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
-#else
-               if (i == 0)
-                       vpi[i].Opcode = OPCODE_MUL;
-               else
-                       vpi[i].Opcode = OPCODE_MAD;
-
-               vpi[i].StringPos = 0;
-               vpi[i].Data = 0;
-
-               if (i == 3)
-                       vpi[i].DstReg.File = PROGRAM_OUTPUT;
-               else
-                       vpi[i].DstReg.File = PROGRAM_TEMPORARY;
-               vpi[i].DstReg.Index = 0;
-               vpi[i].DstReg.WriteMask = 0xf;
-               vpi[i].DstReg.CondMask = COND_TR;
-
-               vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
-               vpi[i].SrcReg[0].Index = idx;
-               vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-
-               vpi[i].SrcReg[1].File = PROGRAM_INPUT;
-               vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
-               vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
-
-               if (i > 0) {
-                       vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
-                       vpi[i].SrcReg[2].Index = 0;
-                       vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
-               }
-#endif
-       }
-
-       _mesa_copy_instructions(&vpi[i], prog->Instructions,
-                               prog->NumInstructions);
-
-       free(prog->Instructions);
+       struct r300_vertex_program *vp;
+       struct r300_vertex_program_compiler compiler;
 
-       prog->Instructions = vpi;
+       vp = _mesa_calloc(sizeof(*vp));
+       vp->Base = (struct gl_vertex_program *) _mesa_clone_program(ctx, &mesa_vp->Base);
+       _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
 
-       prog->NumInstructions += 4;
-       vpi = &prog->Instructions[prog->NumInstructions - 1];
+       rc_init(&compiler.Base);
+       compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
 
-       assert(vpi->Opcode == OPCODE_END);
-}
+       compiler.code = &vp->code;
+       compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
+       compiler.SetHwInputOutput = &t_inputs_outputs;
 
-static void insert_wpos(struct r300_vertex_program *vp,
-                       struct gl_program *prog, GLuint temp_index)
-{
-       struct prog_instruction *vpi;
-       struct prog_instruction *vpi_insert;
-       int i = 0;
+       if (compiler.Base.Debug) {
+               fprintf(stderr, "Initial vertex program:\n");
+               _mesa_print_program(&vp->Base->Base);
+               fflush(stderr);
+       }
 
-       vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
-       _mesa_init_instructions(vpi, prog->NumInstructions + 2);
-       /* all but END */
-       _mesa_copy_instructions(vpi, prog->Instructions,
-                               prog->NumInstructions - 1);
-       /* END */
-       _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
-                               &prog->Instructions[prog->NumInstructions -
-                                                   1], 1);
-       vpi_insert = &vpi[prog->NumInstructions - 1];
+       if (mesa_vp->IsPositionInvariant) {
+               _mesa_insert_mvp_code(ctx, vp->Base);
+       }
 
-       vpi_insert[i].Opcode = OPCODE_MOV;
+       radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
 
-       vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
-       vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
-       vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
-       vpi_insert[i].DstReg.CondMask = COND_TR;
+       if (mesa_vp->IsNVProgram)
+               initialize_NV_registers(&compiler.Base);
 
-       vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-       vpi_insert[i].SrcReg[0].Index = temp_index;
-       vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-       i++;
+       rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
 
-       vpi_insert[i].Opcode = OPCODE_MOV;
+       if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
+               rc_copy_output(&compiler.Base,
+                       VERT_RESULT_HPOS,
+                       vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0);
+       }
 
-       vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
-       vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
-       vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
-       vpi_insert[i].DstReg.CondMask = COND_TR;
+       if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
+               rc_move_output(&compiler.Base,
+                       VERT_RESULT_FOGC,
+                       vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0, WRITEMASK_X);
+       }
 
-       vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
-       vpi_insert[i].SrcReg[0].Index = temp_index;
-       vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
-       i++;
+       r3xx_compile_vertex_program(&compiler);
 
-       free(prog->Instructions);
+       if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
+               rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
+       }
 
-       prog->Instructions = vpi;
+       vp->error = compiler.Base.Error;
 
-       prog->NumInstructions += i;
-       vpi = &prog->Instructions[prog->NumInstructions - 1];
+       vp->Base->Base.InputsRead = vp->code.InputsRead;
+       vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
 
-       assert(vpi->Opcode == OPCODE_END);
-}
+       rc_destroy(&compiler.Base);
 
-static void pos_as_texcoord(struct r300_vertex_program *vp,
-                           struct gl_program *prog)
-{
-       struct prog_instruction *vpi;
-       GLuint tempregi = prog->NumTemporaries;
-       /* should do something else if no temps left... */
-       prog->NumTemporaries++;
-
-       for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
-               if (vpi->DstReg.File == PROGRAM_OUTPUT
-                   && vpi->DstReg.Index == VERT_RESULT_HPOS) {
-                       vpi->DstReg.File = PROGRAM_TEMPORARY;
-                       vpi->DstReg.Index = tempregi;
-               }
-       }
-       insert_wpos(vp, prog, tempregi);
+       return vp;
 }
 
-static struct r300_vertex_program *build_program(struct r300_vertex_program_key
-                                                *wanted_key, struct gl_vertex_program
-                                                *mesa_vp, GLint wpos_idx)
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
 {
+       r300ContextPtr r300 = R300_CONTEXT(ctx);
+       struct r300_vertex_program_key wanted_key = { 0 };
+       struct r300_vertex_program_cont *vpc;
        struct r300_vertex_program *vp;
 
-       vp = _mesa_calloc(sizeof(*vp));
-       _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
-       vp->wpos_idx = wpos_idx;
-
-       if (mesa_vp->IsPositionInvariant) {
-               position_invariant(&mesa_vp->Base);
+       vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+
+       if (!r300->selected_fp) {
+               /* This can happen when GetProgramiv is called to check
+                * whether the program runs natively.
+                *
+                * To be honest, this is not a very good solution,
+                * but solving the problem of reporting good values
+                * for those queries is tough anyway considering that
+                * we recompile vertex programs based on the precise
+                * fragment program that is in use.
+                */
+               r300SelectAndTranslateFragmentShader(ctx);
        }
 
-       if (wpos_idx > -1) {
-               pos_as_texcoord(vp, &mesa_vp->Base);
+       wanted_key.FpReads = r300->selected_fp->InputsRead;
+       wanted_key.FogAttr = r300->selected_fp->fog_attr;
+       wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+
+       for (vp = vpc->progs; vp; vp = vp->next) {
+               if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
+                   == 0) {
+                       return r300->selected_vp = vp;
+               }
        }
 
-       assert(mesa_vp->Base.NumInstructions);
-       vp->num_temporaries = mesa_vp->Base.NumTemporaries;
-       r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
+       vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
+       vp->next = vpc->progs;
+       vpc->progs = vp;
 
-       return vp;
+       return r300->selected_vp = vp;
 }
 
-static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
-{
-       if (key->OutputsWritten & (1 << vert))
-               return;
-
-       key->OutputsWritten |= 1 << vert;
-       key->OutputsAdded |= 1 << vert;
-}
+#define bump_vpu_count(ptr, new_count)   do { \
+               drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
+               int _nc=(new_count)/4; \
+               if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
+       } while(0)
 
-void r300SelectVertexShader(r300ContextPtr r300)
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
 {
-       GLcontext *ctx = ctx = r300->radeon.glCtx;
-       GLuint InputsRead;
-       struct r300_vertex_program_key wanted_key = { 0 };
-       GLint i;
-       struct r300_vertex_program_cont *vpc;
-       struct r300_vertex_program *vp;
-       GLint wpos_idx;
-
-       vpc =
-           (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
-       InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+       int i;
 
-       wpos_idx = -1;
-       if (InputsRead & FRAG_BIT_WPOS) {
-               for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-                       if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
-                               break;
+       assert((code->length > 0) && (code->length % 4 == 0));
 
-               if (i == ctx->Const.MaxTextureUnits) {
-                       fprintf(stderr, "\tno free texcoord found\n");
-                       _mesa_exit(-1);
-               }
+       R300_STATECHANGE( r300, vap_flush );
 
-               InputsRead |= (FRAG_BIT_TEX0 << i);
-               wpos_idx = i;
+       switch ((dest >> 8) & 0xf) {
+               case 0:
+                       R300_STATECHANGE(r300, vpi);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               case 2:
+                       R300_STATECHANGE(r300, vpp);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               case 4:
+                       R300_STATECHANGE(r300, vps);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               default:
+                       fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
+                       exit(-1);
        }
-       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
-       wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+}
 
-       add_outputs(&wanted_key, VERT_RESULT_HPOS);
+void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+       GLcontext *ctx = rmesa->radeon.glCtx;
+       struct r300_vertex_program *prog = rmesa->selected_vp;
+       int inst_count = 0;
+       int param_count = 0;
 
-       if (InputsRead & FRAG_BIT_COL0) {
-               add_outputs(&wanted_key, VERT_RESULT_COL0);
-       }
+       /* Reset state, in case we don't use something */
+       ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
 
-       if (InputsRead & FRAG_BIT_COL1) {
-               add_outputs(&wanted_key, VERT_RESULT_COL1);
-       }
+       R300_STATECHANGE(rmesa, vap_flush);
+       R300_STATECHANGE(rmesa, vpp);
+       param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+       bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+       param_count /= 4;
 
-       for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-               if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-                       add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
-               }
-       }
+       r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
+       inst_count = (prog->code.length / 4) - 1;
 
-       if (vpc->mesa_program.IsPositionInvariant) {
-               /* we wan't position don't we ? */
-               wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
-       }
+       r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
+                                _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
 
-       for (vp = vpc->progs; vp; vp = vp->next)
-               if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
-                   == 0) {
-                       r300->selected_vp = vp;
-                       return;
-               }
-       //_mesa_print_program(&vpc->mesa_program.Base);
+       R300_STATECHANGE(rmesa, pvs);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+                               (inst_count << R300_PVS_LAST_INST_SHIFT);
 
-       vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
-       vp->next = vpc->progs;
-       vpc->progs = vp;
-       r300->selected_vp = vp;
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
 }