Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
index 1fea88a85397af57fc8ecb659c4d4d62c941b26d..c41a8fdd621a373f3288febfea349268982ad8de 100644 (file)
@@ -1,6 +1,7 @@
 /**************************************************************************
 
-Copyright (C) 2005 Aapo Tahkola.
+Copyright (C) 2005  Aapo Tahkola <aet@rasterburn.org>
+Copyright (C) 2008  Oliver McFadden <z3ro.geek@gmail.com>
 
 All Rights Reserved.
 
@@ -25,134 +26,53 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 **************************************************************************/
 
-/**
- * \file
- *
- * \author Aapo Tahkola <aet@rasterburn.org>
- */
+/* Radeon R5xx Acceleration, Revision 1.2 */
 
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "program.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
 #include "shader/prog_instruction.h"
 #include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
 #include "shader/prog_statevars.h"
 #include "tnl/tnl.h"
 
 #include "r300_context.h"
-
-#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
-    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
-    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
-    SWIZZLE_W != VSF_IN_COMPONENT_W || \
-    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
-    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
-    WRITEMASK_X != VSF_FLAG_X || \
-    WRITEMASK_Y != VSF_FLAG_Y || \
-    WRITEMASK_Z != VSF_FLAG_Z || \
-    WRITEMASK_W != VSF_FLAG_W
-#error Cannot change these!
-#endif
+#include "r300_state.h"
 
 /* TODO: Get rid of t_src_class call */
 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
-                      ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
-                        t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
-                       (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
-                        t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
-
-#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
-
-#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
-
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
+                      ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
+                        t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
+                       (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
+                        t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y)  \
+       (PVS_SRC_OPERAND(t_src_index(vp, &src[x]),      \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_src_class(src[x].File), \
+                          VSF_FLAG_NONE) | (src[x].RelAddr << 4))
 
 #define FREE_TEMPS() \
        do { \
-               if(u_temp_i < vp->num_temporaries) { \
-                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
-                       vp->native = GL_FALSE; \
+               int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
+               if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
+                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
+                       vp->error = GL_TRUE; \
                } \
                u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
        } while (0)
 
-#define SCALAR_FLAG (1<<31)
-#define FLAG_MASK (1<<31)
-#define OP_MASK        (0xf)           /* we are unlikely to have more than 15 */
-#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
-
-static struct {
-       char *name;
-       int opcode;
-       unsigned long ip;       /* number of input operands and flags */
-} op_names[] = {
-       /* *INDENT-OFF* */
-       OPN(ABS, 1),
-       OPN(ADD, 2),
-       OPN(ARL, 1 | SCALAR_FLAG),
-       OPN(DP3, 2),
-       OPN(DP4, 2),
-       OPN(DPH, 2),
-       OPN(DST, 2),
-       OPN(EX2, 1 | SCALAR_FLAG),
-       OPN(EXP, 1 | SCALAR_FLAG),
-       OPN(FLR, 1),
-       OPN(FRC, 1),
-       OPN(LG2, 1 | SCALAR_FLAG),
-       OPN(LIT, 1),
-       OPN(LOG, 1 | SCALAR_FLAG),
-       OPN(MAD, 3),
-       OPN(MAX, 2),
-       OPN(MIN, 2),
-       OPN(MOV, 1),
-       OPN(MUL, 2),
-       OPN(POW, 2 | SCALAR_FLAG),
-       OPN(RCP, 1 | SCALAR_FLAG),
-       OPN(RSQ, 1 | SCALAR_FLAG),
-       OPN(SGE, 2),
-       OPN(SLT, 2),
-       OPN(SUB, 2),
-       OPN(SWZ, 1),
-       OPN(XPD, 2),
-       OPN(RCC, 0),    //extra
-       OPN(PRINT, 0),
-       OPN(END, 0)
-       /* *INDENT-ON* */
-};
-
-#undef OPN
-
 int r300VertexProgUpdateParams(GLcontext * ctx,
-                              struct r300_vertex_program_cont *vp,
-                              float *dst)
+                              struct r300_vertex_program_cont *vp, float *dst)
 {
        int pi;
        struct gl_vertex_program *mesa_vp = &vp->mesa_program;
@@ -183,7 +103,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
        paramList = mesa_vp->Base.Parameters;
        for (pi = 0; pi < paramList->NumParameters; pi++) {
                switch (paramList->Parameters[pi].Type) {
-
                case PROGRAM_STATE_VAR:
                case PROGRAM_NAMED_PARAM:
                        //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
@@ -193,7 +112,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
                        *dst++ = paramList->ParameterValues[pi][2];
                        *dst++ = paramList->ParameterValues[pi][3];
                        break;
-
                default:
                        _mesa_problem(NULL, "Bad param type in %s",
                                      __FUNCTION__);
@@ -210,16 +128,16 @@ static unsigned long t_dst_mask(GLuint mask)
        return mask & VSF_FLAG_ALL;
 }
 
-static unsigned long t_dst_class(enum register_file file)
+static unsigned long t_dst_class(gl_register_file file)
 {
 
        switch (file) {
        case PROGRAM_TEMPORARY:
-               return VSF_OUT_CLASS_TMP;
+               return PVS_DST_REG_TEMPORARY;
        case PROGRAM_OUTPUT:
-               return VSF_OUT_CLASS_RESULT;
+               return PVS_DST_REG_OUT;
        case PROGRAM_ADDRESS:
-               return VSF_OUT_CLASS_ADDR;
+               return PVS_DST_REG_A0;
                /*
                   case PROGRAM_INPUT:
                   case PROGRAM_LOCAL_PARAM:
@@ -245,21 +163,19 @@ static unsigned long t_dst_index(struct r300_vertex_program *vp,
        return dst->Index;
 }
 
-static unsigned long t_src_class(enum register_file file)
+static unsigned long t_src_class(gl_register_file file)
 {
-
        switch (file) {
        case PROGRAM_TEMPORARY:
-               return VSF_IN_CLASS_TMP;
-
+               return PVS_SRC_REG_TEMPORARY;
        case PROGRAM_INPUT:
-               return VSF_IN_CLASS_ATTR;
-
+               return PVS_SRC_REG_INPUT;
        case PROGRAM_LOCAL_PARAM:
        case PROGRAM_ENV_PARAM:
        case PROGRAM_NAMED_PARAM:
+       case PROGRAM_CONSTANT:
        case PROGRAM_STATE_VAR:
-               return VSF_IN_CLASS_PARAM;
+               return PVS_SRC_REG_CONSTANT;
                /*
                   case PROGRAM_OUTPUT:
                   case PROGRAM_WRITE_ONLY:
@@ -272,7 +188,7 @@ static unsigned long t_src_class(enum register_file file)
        }
 }
 
-static inline unsigned long t_swizzle(GLubyte swizzle)
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
 {
 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
        return swizzle;
@@ -284,8 +200,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
        int i;
 
        if (vp == NULL) {
-               fprintf(stderr, "vp null in call to %s from %s\n",
-                       __FUNCTION__, caller);
+               fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
+                       caller);
                return;
        }
 
@@ -300,21 +216,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
 static unsigned long t_src_index(struct r300_vertex_program *vp,
                                 struct prog_src_register *src)
 {
-       int i;
-       int max_reg = -1;
-
        if (src->File == PROGRAM_INPUT) {
-               if (vp->inputs[src->Index] != -1)
-                       return vp->inputs[src->Index];
-
-               for (i = 0; i < VERT_ATTRIB_MAX; i++)
-                       if (vp->inputs[i] > max_reg)
-                               max_reg = vp->inputs[i];
-
-               vp->inputs[src->Index] = max_reg + 1;
-
-               //vp_dump_inputs(vp, __FUNCTION__);
-
+               assert(vp->inputs[src->Index] != -1);
                return vp->inputs[src->Index];
        } else {
                if (src->Index < 0) {
@@ -331,600 +234,777 @@ static unsigned long t_src_index(struct r300_vertex_program *vp,
 static unsigned long t_src(struct r300_vertex_program *vp,
                           struct prog_src_register *src)
 {
-       /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
+       /* src->Negate uses the NEGATE_ flags from program_instruction.h,
         * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
         */
-       return MAKE_VSF_SOURCE(t_src_index(vp, src),
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_swizzle(GET_SWZ(src->Swizzle, 1)),
                               t_swizzle(GET_SWZ(src->Swizzle, 2)),
                               t_swizzle(GET_SWZ(src->Swizzle, 3)),
                               t_src_class(src->File),
-                              src->NegateBase) | (src->RelAddr << 4);
+                              src->Negate) | (src->RelAddr << 4);
 }
 
 static unsigned long t_src_scalar(struct r300_vertex_program *vp,
                                  struct prog_src_register *src)
 {
-       /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
+       /* src->Negate uses the NEGATE_ flags from program_instruction.h,
         * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
         */
-       return MAKE_VSF_SOURCE(t_src_index(vp, src),
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_src_class(src->File),
-                              src->
-                              NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+                              src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
            (src->RelAddr << 4);
 }
 
-static unsigned long t_opcode(enum prog_opcode opcode)
+static GLboolean valid_dst(struct r300_vertex_program *vp,
+                          struct prog_dst_register *dst)
 {
-
-       switch (opcode) {
-       /* *INDENT-OFF* */
-       case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
-       case OPCODE_DST: return R300_VPI_OUT_OP_DST;
-       case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
-       case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
-       case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
-       case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
-       case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
-       case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
-       case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
-       case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
-       case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
-       case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
-       case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
-       case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
-       case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
-       /* *INDENT-ON* */
-
-       default:
-               fprintf(stderr, "%s: Should not be called with opcode %d!",
-                       __FUNCTION__, opcode);
+       if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
+               return GL_FALSE;
+       } else if (dst->File == PROGRAM_ADDRESS) {
+               assert(dst->Index == 0);
        }
-       _mesa_exit(-1);
-       return 0;
+
+       return GL_TRUE;
 }
 
-static unsigned long op_operands(enum prog_opcode opcode)
+static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       int i;
+       //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
 
-       /* Can we trust mesas opcodes to be in order ? */
-       for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++)
-               if (op_names[i].opcode == opcode)
-                       return op_names[i].ip;
+       inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+                                 t_src_class(src[0].File),
+                                 (!src[0].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] = 0;
 
-       fprintf(stderr, "op %d not found in op_names\n", opcode);
-       _mesa_exit(-1);
-       return 0;
+       return inst;
 }
 
-static GLboolean valid_dst(struct r300_vertex_program *vp,
-                          struct prog_dst_register *dst)
+static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
-               return GL_FALSE;
-       } else if (dst->File == PROGRAM_ADDRESS) {
-               assert(dst->Index == 0);
-       }
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       return GL_TRUE;
+static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 SWIZZLE_ZERO,
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] =
+           PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
+                           t_src_class(src[1].File),
+                           src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_pow(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src_scalar(vp, &src[0]);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = t_src_scalar(vp, &src[1]);
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_mov(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 PVS_SRC_SELECT_FORCE_1,
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
 
-#if 1
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
-#else
-       hw_op =
-           (src[0].File ==
-            PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ONE_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
-#endif
+       return inst;
 }
 
-static void t_opcode_add(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       unsigned long hw_op;
-
-#if 1
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = ONE_SRC_0;
-       o_inst->src[1] = t_src(vp, &src[0]);
-       o_inst->src[2] = t_src(vp, &src[1]);
-#else
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = t_src(vp, &src[1]);
-       o_inst->src[2] = ZERO_SRC_1;
+       inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-#endif
+static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_mad(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       unsigned long hw_op;
-
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File == PROGRAM_TEMPORARY
-                && src[2].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = t_src(vp, &src[1]);
-       o_inst->src[2] = t_src(vp, &src[2]);
+       inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_mul(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3],
+                                     int *u_temp_i)
 {
-       unsigned long hw_op;
+       /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
+          ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
 
-       // HW mul can take third arg but appears to have some other limitations.
+       inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    *u_temp_i,
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    PVS_DST_REG_TEMPORARY);
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+       inst += 4;
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(*u_temp_i,
+                                 PVS_SRC_SELECT_X,
+                                 PVS_SRC_SELECT_Y,
+                                 PVS_SRC_SELECT_Z,
+                                 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
+                                 /* Not 100% sure about this */
+                                 (!src[0].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
+                                 /*VSF_FLAG_ALL */ );
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+       (*u_temp_i)--;
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
+static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = t_src(vp, &src[1]);
+       inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
 
-       o_inst->src[2] = ZERO_SRC_1;
+       return inst;
 }
 
-static void t_opcode_dp3(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
+       //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           SWIZZLE_ZERO, t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+       inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       /* NOTE: Users swizzling might not work. */
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)),      // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // Y
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
            (src[0].RelAddr << 4);
 
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           SWIZZLE_ZERO, t_src_class(src[1].File),
-                           src[1].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
-           (src[1].RelAddr << 4);
+       return inst;
+}
 
-       o_inst->src[2] = ZERO_SRC_1;
+static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_sub(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       unsigned long hw_op;
+       inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+                                    GL_FALSE,
+                                    GL_TRUE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = t_src(vp, &src[2]);
+
+       return inst;
+}
 
-       //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-#if 1
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ONE_SRC_0;
-       o_inst->src[2] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
-                           t_src_class(src[1].File),
-                           (!src[1].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src[1].RelAddr << 4);
-#else
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
-                           t_src_class(src[1].File),
-                           (!src[1].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src[1].RelAddr << 4);
-       o_inst->src[2] = 0;
-#endif
+static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_abs(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
-                           t_src_class(src[0].File),
-                           (!src[0].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src[0].RelAddr << 4);
-       o_inst->src[2] = 0;
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_flr(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3], int u_temp_i)
+static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
-          ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       VSF_OUT_CLASS_TMP);
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
-       o_inst++;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(u_temp_i, VSF_IN_COMPONENT_X,
-                           VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z,
-                           VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP,
-                           /* Not 100% sure about this */
-                           (!src[0].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
-                           /*VSF_FLAG_ALL */ );
-
-       o_inst->src[2] = ZERO_SRC_0;
-       u_temp_i--;
+static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = t_src_scalar(vp, &src[1]);
+
+       return inst;
 }
 
-static void t_opcode_lg2(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
+       inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
-           (src[0].RelAddr << 4);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
+static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_lit(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+       inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       /* NOTE: Users swizzling might not work. */
-       o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)),       // x
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        VSF_IN_COMPONENT_ZERO, // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-       o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        VSF_IN_COMPONENT_ZERO, // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-       o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        VSF_IN_COMPONENT_ZERO, // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
+static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_dph(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           VSF_IN_COMPONENT_ONE, t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
-           (src[0].RelAddr << 4);
-       o_inst->src[1] = t_src(vp, &src[1]);
-       o_inst->src[2] = ZERO_SRC_1;
+       //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+
+#if 0
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[3] = 0;
+#else
+       inst[0] =
+           PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                              GL_FALSE,
+                              GL_FALSE,
+                              t_dst_index(vp, &vpi->DstReg),
+                              t_dst_mask(vpi->DstReg.WriteMask),
+                              t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ONE);
+       inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+#endif
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_xpd(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3], int u_temp_i)
+static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3],
+                                     int *u_temp_i)
 {
        /* mul r0, r1.yzxw, r2.zxyw
           mad r0, -r2.yzxw, r1.zxyw, r0
-          NOTE: might need MAD_2
         */
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       VSF_OUT_CLASS_TMP);
-
-       o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-
-       o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)),       // z
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
-                                        t_src_class(src[1].File),
-                                        src[1].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[1].
-                                                          RelAddr << 4);
-
-       o_inst->src[2] = ZERO_SRC_1;
-       o_inst++;
-       u_temp_i--;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
-                                        t_src_class(src[1].File),
-                                        (!src[1].
-                                         NegateBase) ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[1].
-                                                          RelAddr << 4);
-
-       o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)),       // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-
-       o_inst->src[2] =
-           MAKE_VSF_SOURCE(u_temp_i + 1, VSF_IN_COMPONENT_X,
-                           VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z,
-                           VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP,
-                           VSF_FLAG_NONE);
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    *u_temp_i,
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    PVS_DST_REG_TEMPORARY);
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),        // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)),      // Z
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),        // Y
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // W
+                                 t_src_class(src[1].File),
+                                 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+       inst += 4;
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),        // Z
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // W
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)),      // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] =
+           PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
+                           PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
+                           PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
 
-}
+       (*u_temp_i)--;
 
-static void t_opcode_rcc(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
-{
-       fprintf(stderr, "Dont know how to handle op %d yet\n",
-               vpi->Opcode);
-       _mesa_exit(-1);
+       return inst;
 }
 
 static void t_inputs_outputs(struct r300_vertex_program *vp)
 {
        int i;
-       int cur_reg = 0;
+       int cur_reg;
 
-       for (i = 0; i < VERT_ATTRIB_MAX; i++)
-               vp->inputs[i] = -1;
+       cur_reg = -1;
+       for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (vp->key.InputsRead & (1 << i))
+                       vp->inputs[i] = ++cur_reg;
+               else
+                       vp->inputs[i] = -1;
+       }
 
+       cur_reg = 0;
        for (i = 0; i < VERT_RESULT_MAX; i++)
                vp->outputs[i] = -1;
 
        assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
 
-       /* Assign outputs */
        if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
                vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
        }
 
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+               vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+       }
+
+       /* If we're writing back facing colors we need to send
+        * four colors to make front/back face colors selection work.
+        * If the vertex program doesn't write all 4 colors, lets
+        * pretend it does by skipping output index reg so the colors
+        * get written into appropriate output vectors.
+        */
        if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
                vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
        }
 
        if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
-               vp->outputs[VERT_RESULT_COL1] =
-                   vp->outputs[VERT_RESULT_COL0] + 1;
-               cur_reg = vp->outputs[VERT_RESULT_COL1] + 1;
+               vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
        }
 
        if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
-               vp->outputs[VERT_RESULT_BFC0] =
-                   vp->outputs[VERT_RESULT_COL0] + 2;
-               cur_reg = vp->outputs[VERT_RESULT_BFC0] + 1;
+               vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
        }
 
        if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
-               vp->outputs[VERT_RESULT_BFC1] =
-                   vp->outputs[VERT_RESULT_COL0] + 3;
-               cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
-       }
-#if 0
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
-               vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+               vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+               cur_reg++;
        }
 
-       if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
-               vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
-       }
-#endif
-
        for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
                if (vp->key.OutputsWritten & (1 << i)) {
                        vp->outputs[i] = cur_reg++;
                }
        }
+
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+               vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+       }
 }
 
 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
                                      struct prog_instruction *vpi)
 {
        int i;
-       struct r300_vertprog_instruction *o_inst;
+       GLuint *inst;
        unsigned long num_operands;
-       int are_srcs_scalar;
        /* Initial value should be last tmp reg that hw supports.
           Strangely enough r300 doesnt mind even though these would be out of range.
           Smart enough to realize that it doesnt need it? */
@@ -932,15 +1012,14 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
        struct prog_src_register src[3];
 
        vp->pos_end = 0;        /* Not supported yet */
-       vp->program.length = 0;
-       /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */
+       vp->hw_code.length = 0;
        vp->translated = GL_TRUE;
-       vp->native = GL_TRUE;
+       vp->error = GL_FALSE;
 
        t_inputs_outputs(vp);
 
-       o_inst = vp->program.body.i;
-       for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) {
+       for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
+            vpi++, inst += 4) {
 
                FREE_TEMPS();
 
@@ -950,8 +1029,7 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
                        vpi->DstReg.Index = u_temp_i;
                }
 
-               num_operands = op_operands(vpi->Opcode) & OP_MASK;
-               are_srcs_scalar = op_operands(vpi->Opcode) & SCALAR_FLAG;
+               num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
 
                /* copy the sources (src) from mesa into a local variable... is this needed? */
                for (i = 0; i < num_operands; i++) {
@@ -961,24 +1039,25 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
                if (num_operands == 3) {        /* TODO: scalars */
                        if (CMP_SRCS(src[1], src[2])
                            || CMP_SRCS(src[0], src[2])) {
-                               o_inst->opcode =
-                                   MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
-                                               u_temp_i, VSF_FLAG_ALL,
-                                               VSF_OUT_CLASS_TMP);
-
-                               o_inst->src[0] =
-                                   MAKE_VSF_SOURCE(t_src_index
-                                                   (vp, &src[2]),
-                                                   SWIZZLE_X, SWIZZLE_Y,
-                                                   SWIZZLE_Z, SWIZZLE_W,
-                                                   t_src_class(src[2].
-                                                               File),
-                                                   VSF_FLAG_NONE) |
-                                   (src[2].RelAddr << 4);
-
-                               o_inst->src[1] = ZERO_SRC_2;
-                               o_inst->src[2] = ZERO_SRC_2;
-                               o_inst++;
+                               inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                                            GL_FALSE,
+                                                            GL_FALSE,
+                                                            u_temp_i,
+                                                            VSF_FLAG_ALL,
+                                                            PVS_DST_REG_TEMPORARY);
+                               inst[1] =
+                                   PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
+                                                   SWIZZLE_X,
+                                                   SWIZZLE_Y,
+                                                   SWIZZLE_Z,
+                                                   SWIZZLE_W,
+                                                   t_src_class(src[2].File),
+                                                   VSF_FLAG_NONE) | (src[2].
+                                                                     RelAddr <<
+                                                                     4);
+                               inst[2] = __CONST(2, SWIZZLE_ZERO);
+                               inst[3] = __CONST(2, SWIZZLE_ZERO);
+                               inst += 4;
 
                                src[2].File = PROGRAM_TEMPORARY;
                                src[2].Index = u_temp_i;
@@ -989,24 +1068,25 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
 
                if (num_operands >= 2) {
                        if (CMP_SRCS(src[1], src[0])) {
-                               o_inst->opcode =
-                                   MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
-                                               u_temp_i, VSF_FLAG_ALL,
-                                               VSF_OUT_CLASS_TMP);
-
-                               o_inst->src[0] =
-                                   MAKE_VSF_SOURCE(t_src_index
-                                                   (vp, &src[0]),
-                                                   SWIZZLE_X, SWIZZLE_Y,
-                                                   SWIZZLE_Z, SWIZZLE_W,
-                                                   t_src_class(src[0].
-                                                               File),
-                                                   VSF_FLAG_NONE) |
-                                   (src[0].RelAddr << 4);
-
-                               o_inst->src[1] = ZERO_SRC_0;
-                               o_inst->src[2] = ZERO_SRC_0;
-                               o_inst++;
+                               inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                                            GL_FALSE,
+                                                            GL_FALSE,
+                                                            u_temp_i,
+                                                            VSF_FLAG_ALL,
+                                                            PVS_DST_REG_TEMPORARY);
+                               inst[1] =
+                                   PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                                   SWIZZLE_X,
+                                                   SWIZZLE_Y,
+                                                   SWIZZLE_Z,
+                                                   SWIZZLE_W,
+                                                   t_src_class(src[0].File),
+                                                   VSF_FLAG_NONE) | (src[0].
+                                                                     RelAddr <<
+                                                                     4);
+                               inst[2] = __CONST(0, SWIZZLE_ZERO);
+                               inst[3] = __CONST(0, SWIZZLE_ZERO);
+                               inst += 4;
 
                                src[0].File = PROGRAM_TEMPORARY;
                                src[0].Index = u_temp_i;
@@ -1016,137 +1096,111 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
                }
 
                switch (vpi->Opcode) {
-               case OPCODE_POW:
-                       t_opcode_pow(vp, vpi, o_inst, src);
+               case OPCODE_ABS:
+                       inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
                        break;
-               case OPCODE_MOV:
-                       t_opcode_mov(vp, vpi, o_inst, src);
+               case OPCODE_ADD:
+                       inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
                        break;
-               case OPCODE_SWZ:
-                       t_opcode_mov(vp, vpi, o_inst, src);
+               case OPCODE_ARL:
+                       inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
                        break;
-               case OPCODE_ADD:
-                       t_opcode_add(vp, vpi, o_inst, src);
+               case OPCODE_DP3:
+                       inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
                        break;
-               case OPCODE_MAD:
-                       t_opcode_mad(vp, vpi, o_inst, src);
+               case OPCODE_DP4:
+                       inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
                        break;
-               case OPCODE_MUL:
-                       t_opcode_mul(vp, vpi, o_inst, src);
+               case OPCODE_DPH:
+                       inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
                        break;
-               case OPCODE_DP3:
-                       t_opcode_dp3(vp, vpi, o_inst, src);
+               case OPCODE_DST:
+                       inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
                        break;
-               case OPCODE_SUB:
-                       t_opcode_sub(vp, vpi, o_inst, src);
+               case OPCODE_EX2:
+                       inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
                        break;
-               case OPCODE_ABS:
-                       t_opcode_abs(vp, vpi, o_inst, src);
+               case OPCODE_EXP:
+                       inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
                        break;
                case OPCODE_FLR:
-                       t_opcode_flr(vp, vpi, o_inst, src,      /* FIXME */
-                                    u_temp_i);
+                       inst = r300TranslateOpcodeFLR(vp, vpi, inst, src,       /* FIXME */
+                                                     &u_temp_i);
+                       break;
+               case OPCODE_FRC:
+                       inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
                        break;
                case OPCODE_LG2:
-                       t_opcode_lg2(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
                        break;
                case OPCODE_LIT:
-                       t_opcode_lit(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
                        break;
-               case OPCODE_DPH:
-                       t_opcode_dph(vp, vpi, o_inst, src);
+               case OPCODE_LOG:
+                       inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
                        break;
-               case OPCODE_XPD:
-                       t_opcode_xpd(vp, vpi, o_inst, src,      /* FIXME */
-                                    u_temp_i);
+               case OPCODE_MAD:
+                       inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
                        break;
-               case OPCODE_RCC:
-                       t_opcode_rcc(vp, vpi, o_inst, src);
+               case OPCODE_MAX:
+                       inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
                        break;
-               case OPCODE_END:
+               case OPCODE_MIN:
+                       inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MOV:
+                       inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MUL:
+                       inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
+                       break;
+               case OPCODE_POW:
+                       inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
+                       break;
+               case OPCODE_RCP:
+                       inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
+                       break;
+               case OPCODE_RSQ:
+                       inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SGE:
+                       inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SLT:
+                       inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SUB:
+                       inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SWZ:
+                       inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
+                       break;
+               case OPCODE_XPD:
+                       inst = r300TranslateOpcodeXPD(vp, vpi, inst, src,       /* FIXME */
+                                                     &u_temp_i);
                        break;
-
-                       /* all other opcodes */
                default:
-                       o_inst->opcode =
-                           MAKE_VSF_OP(t_opcode(vpi->Opcode),
-                                       t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask),
-                                       t_dst_class(vpi->DstReg.File));
-
-                       switch (num_operands) {
-                       case 1:
-                               if (are_srcs_scalar) {
-                                       o_inst->src[0] =
-                                           t_src_scalar(vp, &src[0]);
-                               } else {
-                                       o_inst->src[0] =
-                                           t_src(vp, &src[0]);
-                               }
-                               o_inst->src[1] = ZERO_SRC_0;
-                               o_inst->src[2] = ZERO_SRC_0;
-                               break;
-                       case 2:
-                               if (are_srcs_scalar) {
-                                       o_inst->src[0] =
-                                           t_src_scalar(vp, &src[0]);
-                                       o_inst->src[1] =
-                                           t_src_scalar(vp, &src[1]);
-                               } else {
-                                       o_inst->src[0] =
-                                           t_src(vp, &src[0]);
-                                       o_inst->src[1] =
-                                           t_src(vp, &src[1]);
-                               }
-                               o_inst->src[2] = ZERO_SRC_1;
-                               break;
-                       case 3:
-                               if (are_srcs_scalar) {
-                                       o_inst->src[0] =
-                                           t_src_scalar(vp, &src[0]);
-                                       o_inst->src[1] =
-                                           t_src_scalar(vp, &src[1]);
-                                       o_inst->src[2] =
-                                           t_src_scalar(vp, &src[2]);
-                               } else {
-                                       o_inst->src[0] =
-                                           t_src(vp, &src[0]);
-                                       o_inst->src[1] =
-                                           t_src(vp, &src[1]);
-                                       o_inst->src[2] =
-                                           t_src(vp, &src[2]);
-                               }
-                               break;
-                       default:
-                               assert(0);
-                               break;
-                       }
-
+                       vp->error = GL_TRUE;
                        break;
                }
        }
 
-       vp->program.length = (o_inst - vp->program.body.i) * 4;
-       if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
-               vp->program.length = 0;
-               vp->native = GL_FALSE;
+       vp->hw_code.length = (inst - vp->hw_code.body.d);
+       if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
+               vp->error = GL_TRUE;
        }
-
-#if 0
-       fprintf(stderr, "hw program:\n");
-       for (i = 0; i < vp->program.length; i++)
-               fprintf(stderr, "%08x\n", vp->program.body.d[i]);
-#endif
 }
 
+/* DP4 version seems to trigger some hw peculiarity */
+//#define PREFER_DP4
+
 static void position_invariant(struct gl_program *prog)
 {
        struct prog_instruction *vpi;
        struct gl_program_parameter_list *paramList;
        int i;
 
-       gl_state_index tokens[STATE_LENGTH] =
-           { STATE_MVP_MATRIX, 0, 0, 0, 0 };
+       gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
 
        /* tokens[4] = matrix modifier */
 #ifdef PREFER_DP4
@@ -1186,7 +1240,6 @@ static void position_invariant(struct gl_program *prog)
                else
                        vpi[i].Opcode = OPCODE_MAD;
 
-               vpi[i].StringPos = 0;
                vpi[i].Data = 0;
 
                if (i == 3)
@@ -1226,8 +1279,8 @@ static void position_invariant(struct gl_program *prog)
        assert(vpi->Opcode == OPCODE_END);
 }
 
-static void insert_wpos(struct r300_vertex_program *vp,
-                       struct gl_program *prog, GLuint temp_index)
+static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
+                       GLuint temp_index)
 {
        struct prog_instruction *vpi;
        struct prog_instruction *vpi_insert;
@@ -1240,8 +1293,8 @@ static void insert_wpos(struct r300_vertex_program *vp,
                                prog->NumInstructions - 1);
        /* END */
        _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
-                               &prog->Instructions[prog->NumInstructions -
-                                                   1], 1);
+                               &prog->Instructions[prog->NumInstructions - 1],
+                               1);
        vpi_insert = &vpi[prog->NumInstructions - 1];
 
        vpi_insert[i].Opcode = OPCODE_MOV;
@@ -1314,6 +1367,49 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key
                pos_as_texcoord(vp, &mesa_vp->Base);
        }
 
+       if (RADEON_DEBUG & DEBUG_VERTS) {
+               fprintf(stderr, "Vertex program after native rewrite:\n");
+               _mesa_print_program(&mesa_vp->Base);
+               fflush(stdout);
+       }
+
+       /* Some outputs may be artificially added, to match the inputs of the fragment program.
+        * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
+        * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
+        */
+       {
+               int i, count = 0;
+               for (i = 0; i < VERT_RESULT_MAX; ++i) {
+                       if (vp->key.OutputsAdded & (1 << i)) {
+                               ++count;
+                       }
+               }
+
+               if (count > 0) {
+                       struct prog_instruction *inst;
+
+                       _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count);
+                       inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count];
+
+                       for (i = 0; i < VERT_RESULT_MAX; ++i) {
+                               if (vp->key.OutputsAdded & (1 << i)) {
+                                       inst->Opcode = OPCODE_MOV;
+
+                                       inst->DstReg.File = PROGRAM_OUTPUT;
+                                       inst->DstReg.Index = i;
+                                       inst->DstReg.WriteMask = WRITEMASK_XYZW;
+                                       inst->DstReg.CondMask = COND_TR;
+
+                                       inst->SrcReg[0].File = PROGRAM_CONSTANT;
+                                       inst->SrcReg[0].Index = 0;
+                                       inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+                                       ++inst;
+                               }
+                       }
+               }
+       }
+
        assert(mesa_vp->Base.NumInstructions);
        vp->num_temporaries = mesa_vp->Base.NumTemporaries;
        r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
@@ -1321,6 +1417,15 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key
        return vp;
 }
 
+static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+{
+       if (key->OutputsWritten & (1 << vert))
+               return;
+
+       key->OutputsWritten |= 1 << vert;
+       key->OutputsAdded |= 1 << vert;
+}
+
 void r300SelectVertexShader(r300ContextPtr r300)
 {
        GLcontext *ctx = ctx = r300->radeon.glCtx;
@@ -1331,8 +1436,9 @@ void r300SelectVertexShader(r300ContextPtr r300)
        struct r300_vertex_program *vp;
        GLint wpos_idx;
 
-       vpc =
-           (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+       vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
+       wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
        InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
 
        wpos_idx = -1;
@@ -1346,14 +1452,33 @@ void r300SelectVertexShader(r300ContextPtr r300)
                        _mesa_exit(-1);
                }
 
-               InputsRead |= (FRAG_BIT_TEX0 << i);
+               wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
                wpos_idx = i;
        }
-       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
-       wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
        if (vpc->mesa_program.IsPositionInvariant) {
-               /* we wan't position don't we ? */
                wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
+               wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
+       } else {
+               add_outputs(&wanted_key, VERT_RESULT_HPOS);
+       }
+
+       if (InputsRead & FRAG_BIT_COL0) {
+               add_outputs(&wanted_key, VERT_RESULT_COL0);
+       }
+
+       if (InputsRead & FRAG_BIT_COL1) {
+               add_outputs(&wanted_key, VERT_RESULT_COL1);
+       }
+
+       if (InputsRead & FRAG_BIT_FOGC) {
+               add_outputs(&wanted_key, VERT_RESULT_FOGC);
+       }
+
+       for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+               if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+                       add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
+               }
        }
 
        for (vp = vpc->progs; vp; vp = vp->next)
@@ -1362,7 +1487,12 @@ void r300SelectVertexShader(r300ContextPtr r300)
                        r300->selected_vp = vp;
                        return;
                }
-       //_mesa_print_program(&vpc->mesa_program.Base);
+
+       if (RADEON_DEBUG & DEBUG_VERTS) {
+               fprintf(stderr, "Initial vertex program:\n");
+               _mesa_print_program(&vpc->mesa_program.Base);
+               fflush(stdout);
+       }
 
        vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
        vp->next = vpc->progs;
@@ -1370,4 +1500,74 @@ void r300SelectVertexShader(r300ContextPtr r300)
        r300->selected_vp = vp;
 }
 
-/* vim: set foldenable foldmethod=marker : */
+#define bump_vpu_count(ptr, new_count)   do { \
+               drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
+               int _nc=(new_count)/4; \
+               assert(_nc < 256); \
+               if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
+       } while(0)
+
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
+{
+       int i;
+
+       assert((code->length > 0) && (code->length % 4 == 0));
+
+       switch ((dest >> 8) & 0xf) {
+               case 0:
+                       R300_STATECHANGE(r300, vpi);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               case 2:
+                       R300_STATECHANGE(r300, vpp);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               case 4:
+                       R300_STATECHANGE(r300, vps);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               default:
+                       fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
+                       _mesa_exit(-1);
+       }
+}
+
+void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+       GLcontext *ctx = rmesa->radeon.glCtx;
+       struct r300_vertex_program *prog = rmesa->selected_vp;
+       int inst_count = 0;
+       int param_count = 0;
+       
+       /* Reset state, in case we don't use something */
+       ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+       
+       R300_STATECHANGE(rmesa, vpp);
+       param_count = r300VertexProgUpdateParams(ctx,
+                                                               (struct r300_vertex_program_cont *)
+                                                               ctx->VertexProgram._Current,
+                                                               (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+       bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+       param_count /= 4;
+
+       r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
+       inst_count = (prog->hw_code.length / 4) - 1;
+
+       r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
+                                _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
+
+       R300_STATECHANGE(rmesa, pvs);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+                               (inst_count << R300_PVS_LAST_INST_SHIFT);
+
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+}