r5xx: Dump shader constants when dumping program assembly.
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
index da2c6dbcd5bb4bd9404af96461c90e4d4ac204a3..861f0427cf5dcd56cc25b34e7ae2c3a28a970eb3 100644 (file)
@@ -1,6 +1,7 @@
 /**************************************************************************
 
-Copyright (C) 2005 Aapo Tahkola.
+Copyright (C) 2005  Aapo Tahkola <aet@rasterburn.org>
+Copyright (C) 2008  Oliver McFadden <z3ro.geek@gmail.com>
 
 All Rights Reserved.
 
@@ -25,16 +26,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 **************************************************************************/
 
-/**
- * \file
- *
- * \author Aapo Tahkola <aet@rasterburn.org>
- */
+/* Radeon R5xx Acceleration, Revision 1.2 */
 
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "program.h"
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
 #include "shader/prog_instruction.h"
 #include "shader/prog_parameter.h"
 #include "shader/prog_statevars.h"
@@ -42,117 +39,38 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "r300_context.h"
 
-#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
-    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
-    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
-    SWIZZLE_W != VSF_IN_COMPONENT_W || \
-    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
-    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
-    WRITEMASK_X != VSF_FLAG_X || \
-    WRITEMASK_Y != VSF_FLAG_Y || \
-    WRITEMASK_Z != VSF_FLAG_Z || \
-    WRITEMASK_W != VSF_FLAG_W
-#error Cannot change these!
-#endif
-
 /* TODO: Get rid of t_src_class call */
 #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
-                      ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
-                        t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
-                       (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
-                        t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
-
-#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
-
-#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
+                      ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
+                        t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
+                       (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
+                        t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
 
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y)  \
+       (PVS_SRC_OPERAND(t_src_index(vp, &src[x]),      \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_src_class(src[x].File), \
+                          VSF_FLAG_NONE) | (src[x].RelAddr << 4))
 
 #define FREE_TEMPS() \
        do { \
-               if(u_temp_i < vp->num_temporaries) { \
-                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
+               int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
+               if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
+                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
                        vp->native = GL_FALSE; \
                } \
                u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
        } while (0)
 
-#define SCALAR_FLAG (1<<31)
-#define FLAG_MASK (1<<31)
-#define OP_MASK        (0xf)           /* we are unlikely to have more than 15 */
-#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
-
-static struct {
-       char *name;
-       int opcode;
-       unsigned long ip;       /* number of input operands and flags */
-} op_names[] = {
-       /* *INDENT-OFF* */
-       OPN(ABS, 1),
-       OPN(ADD, 2),
-       OPN(ARL, 1 | SCALAR_FLAG),
-       OPN(DP3, 2),
-       OPN(DP4, 2),
-       OPN(DPH, 2),
-       OPN(DST, 2),
-       OPN(EX2, 1 | SCALAR_FLAG),
-       OPN(EXP, 1 | SCALAR_FLAG),
-       OPN(FLR, 1),
-       OPN(FRC, 1),
-       OPN(LG2, 1 | SCALAR_FLAG),
-       OPN(LIT, 1),
-       OPN(LOG, 1 | SCALAR_FLAG),
-       OPN(MAD, 3),
-       OPN(MAX, 2),
-       OPN(MIN, 2),
-       OPN(MOV, 1),
-       OPN(MUL, 2),
-       OPN(POW, 2 | SCALAR_FLAG),
-       OPN(RCP, 1 | SCALAR_FLAG),
-       OPN(RSQ, 1 | SCALAR_FLAG),
-       OPN(SGE, 2),
-       OPN(SLT, 2),
-       OPN(SUB, 2),
-       OPN(SWZ, 1),
-       OPN(XPD, 2),
-       OPN(RCC, 0),    //extra
-       OPN(PRINT, 0),
-       OPN(END, 0)
-       /* *INDENT-ON* */
-};
-
-#undef OPN
-
 int r300VertexProgUpdateParams(GLcontext * ctx,
-                              struct r300_vertex_program_cont *vp,
-                              float *dst)
+                              struct r300_vertex_program_cont *vp, float *dst)
 {
        int pi;
        struct gl_vertex_program *mesa_vp = &vp->mesa_program;
@@ -183,7 +101,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
        paramList = mesa_vp->Base.Parameters;
        for (pi = 0; pi < paramList->NumParameters; pi++) {
                switch (paramList->Parameters[pi].Type) {
-
                case PROGRAM_STATE_VAR:
                case PROGRAM_NAMED_PARAM:
                        //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
@@ -193,7 +110,6 @@ int r300VertexProgUpdateParams(GLcontext * ctx,
                        *dst++ = paramList->ParameterValues[pi][2];
                        *dst++ = paramList->ParameterValues[pi][3];
                        break;
-
                default:
                        _mesa_problem(NULL, "Bad param type in %s",
                                      __FUNCTION__);
@@ -215,11 +131,11 @@ static unsigned long t_dst_class(enum register_file file)
 
        switch (file) {
        case PROGRAM_TEMPORARY:
-               return VSF_OUT_CLASS_TMP;
+               return PVS_DST_REG_TEMPORARY;
        case PROGRAM_OUTPUT:
-               return VSF_OUT_CLASS_RESULT;
+               return PVS_DST_REG_OUT;
        case PROGRAM_ADDRESS:
-               return VSF_OUT_CLASS_ADDR;
+               return PVS_DST_REG_A0;
                /*
                   case PROGRAM_INPUT:
                   case PROGRAM_LOCAL_PARAM:
@@ -247,19 +163,17 @@ static unsigned long t_dst_index(struct r300_vertex_program *vp,
 
 static unsigned long t_src_class(enum register_file file)
 {
-
        switch (file) {
        case PROGRAM_TEMPORARY:
-               return VSF_IN_CLASS_TMP;
-
+               return PVS_SRC_REG_TEMPORARY;
        case PROGRAM_INPUT:
-               return VSF_IN_CLASS_ATTR;
-
+               return PVS_SRC_REG_INPUT;
        case PROGRAM_LOCAL_PARAM:
        case PROGRAM_ENV_PARAM:
        case PROGRAM_NAMED_PARAM:
+       case PROGRAM_CONSTANT:
        case PROGRAM_STATE_VAR:
-               return VSF_IN_CLASS_PARAM;
+               return PVS_SRC_REG_CONSTANT;
                /*
                   case PROGRAM_OUTPUT:
                   case PROGRAM_WRITE_ONLY:
@@ -284,8 +198,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
        int i;
 
        if (vp == NULL) {
-               fprintf(stderr, "vp null in call to %s from %s\n",
-                       __FUNCTION__, caller);
+               fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
+                       caller);
                return;
        }
 
@@ -334,7 +248,7 @@ static unsigned long t_src(struct r300_vertex_program *vp,
        /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
         * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
         */
-       return MAKE_VSF_SOURCE(t_src_index(vp, src),
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_swizzle(GET_SWZ(src->Swizzle, 1)),
                               t_swizzle(GET_SWZ(src->Swizzle, 2)),
@@ -349,7 +263,7 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp,
        /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
         * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
         */
-       return MAKE_VSF_SOURCE(t_src_index(vp, src),
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
@@ -360,50 +274,6 @@ static unsigned long t_src_scalar(struct r300_vertex_program *vp,
            (src->RelAddr << 4);
 }
 
-static unsigned long t_opcode(enum prog_opcode opcode)
-{
-
-       switch (opcode) {
-       /* *INDENT-OFF* */
-       case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
-       case OPCODE_DST: return R300_VPI_OUT_OP_DST;
-       case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
-       case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
-       case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
-       case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
-       case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
-       case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
-       case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
-       case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
-       case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
-       case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
-       case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
-       case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
-       case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
-       /* *INDENT-ON* */
-
-       default:
-               fprintf(stderr, "%s: Should not be called with opcode %d!",
-                       __FUNCTION__, opcode);
-       }
-       _mesa_exit(-1);
-       return 0;
-}
-
-static unsigned long op_operands(enum prog_opcode opcode)
-{
-       int i;
-
-       /* Can we trust mesas opcodes to be in order ? */
-       for (i = 0; i < sizeof(op_names) / sizeof(*op_names); i++)
-               if (op_names[i].opcode == opcode)
-                       return op_names[i].ip;
-
-       fprintf(stderr, "op %d not found in op_names\n", opcode);
-       _mesa_exit(-1);
-       return 0;
-}
-
 static GLboolean valid_dst(struct r300_vertex_program *vp,
                           struct prog_dst_register *dst)
 {
@@ -416,565 +286,669 @@ static GLboolean valid_dst(struct r300_vertex_program *vp,
        return GL_TRUE;
 }
 
-/*
- * Instruction    Inputs  Output   Description
- * -----------    ------  ------   --------------------------------
- * ABS            v       v        absolute value
- * ADD            v,v     v        add
- * ARL            s       a        address register load
- * DP3            v,v     ssss     3-component dot product
- * DP4            v,v     ssss     4-component dot product
- * DPH            v,v     ssss     homogeneous dot product
- * DST            v,v     v        distance vector
- * EX2            s       ssss     exponential base 2
- * EXP            s       v        exponential base 2 (approximate)
- * FLR            v       v        floor
- * FRC            v       v        fraction
- * LG2            s       ssss     logarithm base 2
- * LIT            v       v        compute light coefficients
- * LOG            s       v        logarithm base 2 (approximate)
- * MAD            v,v,v   v        multiply and add
- * MAX            v,v     v        maximum
- * MIN            v,v     v        minimum
- * MOV            v       v        move
- * MUL            v,v     v        multiply
- * POW            s,s     ssss     exponentiate
- * RCP            s       ssss     reciprocal
- * RSQ            s       ssss     reciprocal square root
- * SGE            v,v     v        set on greater than or equal
- * SLT            v,v     v        set on less than
- * SUB            v,v     v        subtract
- * SWZ            v       v        extended swizzle
- * XPD            v,v     v        cross product
- *
- * Table X.5:  Summary of vertex program instructions.  "v" indicates a
- * floating-point vector input or output, "s" indicates a floating-point
- * scalar input, "ssss" indicates a scalar output replicated across a
- * 4-component result vector, and "a" indicates a single address register
- * component.
- */
-
-static void t_opcode_abs(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
        //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
-                           t_src_class(src[0].File),
-                           (!src[0].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+       inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+                                 t_src_class(src[0].File),
+                                 (!src[0].
+                                  NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
            (src[0].RelAddr << 4);
-       o_inst->src[2] = 0;
+       inst[3] = 0;
+
+       return inst;
 }
 
-static void t_opcode_add(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       unsigned long hw_op;
-
-#if 1
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = ONE_SRC_0;
-       o_inst->src[1] = t_src(vp, &src[0]);
-       o_inst->src[2] = t_src(vp, &src[1]);
-#else
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = t_src(vp, &src[1]);
-       o_inst->src[2] = ZERO_SRC_1;
-
-#endif
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-/* TODO: ARL */
+static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_dp3(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
        //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           SWIZZLE_ZERO, t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 SWIZZLE_ZERO,
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
            (src[0].RelAddr << 4);
-
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+       inst[2] =
+           PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
                            t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
                            t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           SWIZZLE_ZERO, t_src_class(src[1].File),
+                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
+                           t_src_class(src[1].File),
                            src[1].
                            NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
            (src[1].RelAddr << 4);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
 
-       o_inst->src[2] = ZERO_SRC_1;
+       return inst;
 }
 
-/* TODO: DP4 */
+static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_dph(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
        //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                           VSF_IN_COMPONENT_ONE, t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 PVS_SRC_SELECT_FORCE_1,
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
            (src[0].RelAddr << 4);
-       o_inst->src[1] = t_src(vp, &src[1]);
-       o_inst->src[2] = ZERO_SRC_1;
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-/* TODO: DST */
+static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-/* TODO: EX2 */
+static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-/* TODO: EXP */
+static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_flr(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3], int *u_temp_i)
+static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3],
+                                     int *u_temp_i)
 {
        /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
           ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, *u_temp_i,
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       VSF_OUT_CLASS_TMP);
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
-       o_inst++;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(*u_temp_i, VSF_IN_COMPONENT_X,
-                           VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z,
-                           VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP,
-                           /* Not 100% sure about this */
-                           (!src[0].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
-                           /*VSF_FLAG_ALL */ );
-
-       o_inst->src[2] = ZERO_SRC_0;
+       inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    *u_temp_i,
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    PVS_DST_REG_TEMPORARY);
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+       inst += 4;
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(*u_temp_i,
+                                 PVS_SRC_SELECT_X,
+                                 PVS_SRC_SELECT_Y,
+                                 PVS_SRC_SELECT_Z,
+                                 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
+                                 /* Not 100% sure about this */
+                                 (!src[0].
+                                  NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE
+                                 /*VSF_FLAG_ALL */ );
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
        (*u_temp_i)--;
+
+       return inst;
 }
 
-/* TODO: FRC */
+static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_lg2(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
        // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                           t_src_class(src[0].File),
-                           src[0].
-                           NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+       inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
            (src[0].RelAddr << 4);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_lit(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
        //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
+       inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
        /* NOTE: Users swizzling might not work. */
-       o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)),       // x
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        VSF_IN_COMPONENT_ZERO, // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-       o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        VSF_IN_COMPONENT_ZERO, // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-       o_inst->src[2] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        VSF_IN_COMPONENT_ZERO, // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)),      // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // Y
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+
+       return inst;
 }
 
-/* TODO: LOG */
+static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_mad(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       unsigned long hw_op;
-
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File == PROGRAM_TEMPORARY
-                && src[2].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = t_src(vp, &src[1]);
-       o_inst->src[2] = t_src(vp, &src[2]);
+       inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+                                    GL_FALSE,
+                                    GL_TRUE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = t_src(vp, &src[2]);
+
+       return inst;
 }
 
-/* TODO: MAX */
+static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-/* TODO: MIN */
+static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_mov(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
        //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
 
-#if 1
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
-#else
-       hw_op =
-           (src[0].File ==
-            PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ONE_SRC_0;
-       o_inst->src[2] = ZERO_SRC_0;
-#endif
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_mul(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       unsigned long hw_op;
-
-       // HW mul can take third arg but appears to have some other limitations.
-
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = t_src(vp, &src[1]);
-
-       o_inst->src[2] = ZERO_SRC_1;
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static void t_opcode_pow(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src_scalar(vp, &src[0]);
-       o_inst->src[1] = ZERO_SRC_0;
-       o_inst->src[2] = t_src_scalar(vp, &src[1]);
+       inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = t_src_scalar(vp, &src[1]);
+
+       return inst;
 }
 
-/* TODO: RCP */
-
-/* TODO: RSQ */
+static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-/* TODO: SGE */
+static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-/* TODO: SLT */
+static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_sub(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
+static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       unsigned long hw_op;
+       inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
+static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
        //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
 
-#if 1
-       hw_op = (src[0].File == PROGRAM_TEMPORARY
-                && src[1].File ==
-                PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 :
-           R300_VPI_OUT_OP_MAD;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] = ONE_SRC_0;
-       o_inst->src[2] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
-                           t_src_class(src[1].File),
-                           (!src[1].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+#if 0
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
            (src[1].RelAddr << 4);
+       inst[3] = 0;
 #else
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = t_src(vp, &src[0]);
-       o_inst->src[1] =
-           MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                           t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
-                           t_src_class(src[1].File),
-                           (!src[1].
-                            NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+       inst[0] =
+           PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                              GL_FALSE,
+                              GL_FALSE,
+                              t_dst_index(vp, &vpi->DstReg),
+                              t_dst_mask(vpi->DstReg.WriteMask),
+                              t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ONE);
+       inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
            (src[1].RelAddr << 4);
-       o_inst->src[2] = 0;
 #endif
+
+       return inst;
 }
 
-/* TODO: SWZ */
+static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void t_opcode_xpd(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3], int *u_temp_i)
+static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3],
+                                     int *u_temp_i)
 {
        /* mul r0, r1.yzxw, r2.zxyw
           mad r0, -r2.yzxw, r1.zxyw, r0
-          NOTE: might need MAD_2
         */
 
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, *u_temp_i,
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       VSF_OUT_CLASS_TMP);
-
-       o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-
-       o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)),       // z
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
-                                        t_src_class(src[1].File),
-                                        src[1].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[1].
-                                                          RelAddr << 4);
-
-       o_inst->src[2] = ZERO_SRC_1;
-       o_inst++;
-       (*u_temp_i)--;
-
-       o_inst->opcode =
-           MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       o_inst->src[0] = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)),       // y
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
-                                        t_src_class(src[1].File),
-                                        (!src[1].
-                                         NegateBase) ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[1].
-                                                          RelAddr << 4);
-
-       o_inst->src[1] = MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)),       // z
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                        t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                        t_src_class(src[0].File),
-                                        src[0].
-                                        NegateBase ? VSF_FLAG_ALL :
-                                        VSF_FLAG_NONE) | (src[0].
-                                                          RelAddr << 4);
-
-       o_inst->src[2] =
-           MAKE_VSF_SOURCE(*u_temp_i + 1, VSF_IN_COMPONENT_X,
-                           VSF_IN_COMPONENT_Y, VSF_IN_COMPONENT_Z,
-                           VSF_IN_COMPONENT_W, VSF_IN_CLASS_TMP,
-                           VSF_FLAG_NONE);
-
-}
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    *u_temp_i,
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    PVS_DST_REG_TEMPORARY);
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),        // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)),      // Z
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),        // Y
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // W
+                                 t_src_class(src[1].File),
+                                 src[1].
+                                 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+       inst += 4;
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),        // Z
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // W
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)),      // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].
+                                 NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] =
+           PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
+                           PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
+                           PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
 
-static void t_opcode_rcc(struct r300_vertex_program *vp,
-                        struct prog_instruction *vpi,
-                        struct r300_vertprog_instruction *o_inst,
-                        struct prog_src_register src[3])
-{
-       fprintf(stderr, "Dont know how to handle op %d yet\n",
-               vpi->Opcode);
-       _mesa_exit(-1);
-}
+       (*u_temp_i)--;
 
-static void t_opcode_default(struct r300_vertex_program *vp,
-                            struct prog_instruction *vpi,
-                            struct r300_vertprog_instruction *o_inst,
-                            struct prog_src_register src[3],
-                            int num_operands, int are_srcs_scalar)
-{
-       o_inst->opcode =
-           MAKE_VSF_OP(t_opcode(vpi->Opcode),
-                       t_dst_index(vp, &vpi->DstReg),
-                       t_dst_mask(vpi->DstReg.WriteMask),
-                       t_dst_class(vpi->DstReg.File));
-
-       switch (num_operands) {
-       case 1:
-               if (are_srcs_scalar) {
-                       o_inst->src[0] = t_src_scalar(vp, &src[0]);
-               } else {
-                       o_inst->src[0] = t_src(vp, &src[0]);
-               }
-               o_inst->src[1] = ZERO_SRC_0;
-               o_inst->src[2] = ZERO_SRC_0;
-               break;
-       case 2:
-               if (are_srcs_scalar) {
-                       o_inst->src[0] = t_src_scalar(vp, &src[0]);
-                       o_inst->src[1] = t_src_scalar(vp, &src[1]);
-               } else {
-                       o_inst->src[0] = t_src(vp, &src[0]);
-                       o_inst->src[1] = t_src(vp, &src[1]);
-               }
-               o_inst->src[2] = ZERO_SRC_1;
-               break;
-       case 3:
-               if (are_srcs_scalar) {
-                       o_inst->src[0] = t_src_scalar(vp, &src[0]);
-                       o_inst->src[1] = t_src_scalar(vp, &src[1]);
-                       o_inst->src[2] = t_src_scalar(vp, &src[2]);
-               } else {
-                       o_inst->src[0] = t_src(vp, &src[0]);
-                       o_inst->src[1] = t_src(vp, &src[1]);
-                       o_inst->src[2] = t_src(vp, &src[2]);
-               }
-               break;
-       default:
-               assert(0);
-               break;
-       }
+       return inst;
 }
 
 static void t_inputs_outputs(struct r300_vertex_program *vp)
@@ -1011,7 +985,7 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
        if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
                vp->outputs[VERT_RESULT_BFC0] =
                    vp->outputs[VERT_RESULT_COL0] + 2;
-               cur_reg = vp->outputs[VERT_RESULT_BFC0] + 1;
+               cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2;
        }
 
        if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
@@ -1036,9 +1010,8 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
                                      struct prog_instruction *vpi)
 {
        int i;
-       struct r300_vertprog_instruction *o_inst;
+       GLuint *inst;
        unsigned long num_operands;
-       int are_srcs_scalar;
        /* Initial value should be last tmp reg that hw supports.
           Strangely enough r300 doesnt mind even though these would be out of range.
           Smart enough to realize that it doesnt need it? */
@@ -1053,8 +1026,8 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
 
        t_inputs_outputs(vp);
 
-       o_inst = vp->program.body.i;
-       for (; vpi->Opcode != OPCODE_END; vpi++, o_inst++) {
+       for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END;
+            vpi++, inst += 4) {
 
                FREE_TEMPS();
 
@@ -1064,8 +1037,7 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
                        vpi->DstReg.Index = u_temp_i;
                }
 
-               num_operands = op_operands(vpi->Opcode) & OP_MASK;
-               are_srcs_scalar = op_operands(vpi->Opcode) & SCALAR_FLAG;
+               num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
 
                /* copy the sources (src) from mesa into a local variable... is this needed? */
                for (i = 0; i < num_operands; i++) {
@@ -1075,24 +1047,25 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
                if (num_operands == 3) {        /* TODO: scalars */
                        if (CMP_SRCS(src[1], src[2])
                            || CMP_SRCS(src[0], src[2])) {
-                               o_inst->opcode =
-                                   MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
-                                               u_temp_i, VSF_FLAG_ALL,
-                                               VSF_OUT_CLASS_TMP);
-
-                               o_inst->src[0] =
-                                   MAKE_VSF_SOURCE(t_src_index
-                                                   (vp, &src[2]),
-                                                   SWIZZLE_X, SWIZZLE_Y,
-                                                   SWIZZLE_Z, SWIZZLE_W,
-                                                   t_src_class(src[2].
-                                                               File),
-                                                   VSF_FLAG_NONE) |
-                                   (src[2].RelAddr << 4);
-
-                               o_inst->src[1] = ZERO_SRC_2;
-                               o_inst->src[2] = ZERO_SRC_2;
-                               o_inst++;
+                               inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                                            GL_FALSE,
+                                                            GL_FALSE,
+                                                            u_temp_i,
+                                                            VSF_FLAG_ALL,
+                                                            PVS_DST_REG_TEMPORARY);
+                               inst[1] =
+                                   PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
+                                                   SWIZZLE_X,
+                                                   SWIZZLE_Y,
+                                                   SWIZZLE_Z,
+                                                   SWIZZLE_W,
+                                                   t_src_class(src[2].File),
+                                                   VSF_FLAG_NONE) | (src[2].
+                                                                     RelAddr <<
+                                                                     4);
+                               inst[2] = __CONST(2, SWIZZLE_ZERO);
+                               inst[3] = __CONST(2, SWIZZLE_ZERO);
+                               inst += 4;
 
                                src[2].File = PROGRAM_TEMPORARY;
                                src[2].Index = u_temp_i;
@@ -1103,24 +1076,25 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
 
                if (num_operands >= 2) {
                        if (CMP_SRCS(src[1], src[0])) {
-                               o_inst->opcode =
-                                   MAKE_VSF_OP(R300_VPI_OUT_OP_ADD,
-                                               u_temp_i, VSF_FLAG_ALL,
-                                               VSF_OUT_CLASS_TMP);
-
-                               o_inst->src[0] =
-                                   MAKE_VSF_SOURCE(t_src_index
-                                                   (vp, &src[0]),
-                                                   SWIZZLE_X, SWIZZLE_Y,
-                                                   SWIZZLE_Z, SWIZZLE_W,
-                                                   t_src_class(src[0].
-                                                               File),
-                                                   VSF_FLAG_NONE) |
-                                   (src[0].RelAddr << 4);
-
-                               o_inst->src[1] = ZERO_SRC_0;
-                               o_inst->src[2] = ZERO_SRC_0;
-                               o_inst++;
+                               inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                                            GL_FALSE,
+                                                            GL_FALSE,
+                                                            u_temp_i,
+                                                            VSF_FLAG_ALL,
+                                                            PVS_DST_REG_TEMPORARY);
+                               inst[1] =
+                                   PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                                   SWIZZLE_X,
+                                                   SWIZZLE_Y,
+                                                   SWIZZLE_Z,
+                                                   SWIZZLE_W,
+                                                   t_src_class(src[0].File),
+                                                   VSF_FLAG_NONE) | (src[0].
+                                                                     RelAddr <<
+                                                                     4);
+                               inst[2] = __CONST(0, SWIZZLE_ZERO);
+                               inst[3] = __CONST(0, SWIZZLE_ZERO);
+                               inst += 4;
 
                                src[0].File = PROGRAM_TEMPORARY;
                                src[0].Index = u_temp_i;
@@ -1131,80 +1105,112 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
 
                switch (vpi->Opcode) {
                case OPCODE_ABS:
-                       t_opcode_abs(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
                        break;
                case OPCODE_ADD:
-                       t_opcode_add(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
+                       break;
+               case OPCODE_ARL:
+                       inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
                        break;
-                       /* TODO: ARL */
                case OPCODE_DP3:
-                       t_opcode_dp3(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
+                       break;
+               case OPCODE_DP4:
+                       inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
                        break;
-                       /* TODO: DP4 */
                case OPCODE_DPH:
-                       t_opcode_dph(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
+                       break;
+               case OPCODE_DST:
+                       inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
+                       break;
+               case OPCODE_EX2:
+                       inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
+                       break;
+               case OPCODE_EXP:
+                       inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
                        break;
-                       /* TODO: DST */
-                       /* TODO: EX2 */
-                       /* TODO: EXP */
                case OPCODE_FLR:
-                       /* FIXME */
-                       t_opcode_flr(vp, vpi, o_inst, src, &u_temp_i);
+                       inst = r300TranslateOpcodeFLR(vp, vpi, inst, src,       /* FIXME */
+                                                     &u_temp_i);
+                       break;
+               case OPCODE_FRC:
+                       inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
                        break;
-                       /* TODO: FRC */
                case OPCODE_LG2:
-                       t_opcode_lg2(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
                        break;
                case OPCODE_LIT:
-                       t_opcode_lit(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
+                       break;
+               case OPCODE_LOG:
+                       inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
                        break;
-                       /* TODO: LOG */
                case OPCODE_MAD:
-                       t_opcode_mad(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MAX:
+                       inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MIN:
+                       inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
                        break;
-                       /* TODO: MAX */
-                       /* TODO: MIN */
                case OPCODE_MOV:
-                       t_opcode_mov(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
                        break;
                case OPCODE_MUL:
-                       t_opcode_mul(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
                        break;
                case OPCODE_POW:
-                       t_opcode_pow(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
+                       break;
+               case OPCODE_RCP:
+                       inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
+                       break;
+               case OPCODE_RSQ:
+                       inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SGE:
+                       inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SLT:
+                       inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
                        break;
-                       /* TODO: RCP */
-                       /* TODO: RSQ */
-                       /* TODO: SGE */
-                       /* TODO: SLT */
                case OPCODE_SUB:
-                       t_opcode_sub(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
                        break;
                case OPCODE_SWZ:
-                       t_opcode_mov(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
                        break;
-                       /* TODO: SWZ */
                case OPCODE_XPD:
-                       /* FIXME */
-                       t_opcode_xpd(vp, vpi, o_inst, src, &u_temp_i);
-                       break;
-
-               case OPCODE_RCC:
-                       t_opcode_rcc(vp, vpi, o_inst, src);
+                       inst = r300TranslateOpcodeXPD(vp, vpi, inst, src,       /* FIXME */
+                                                     &u_temp_i);
                        break;
-
-               case OPCODE_END:
-                       /* empty */
-                       break;
-
                default:
-                       t_opcode_default(vp, vpi, o_inst, src,
-                                        num_operands, are_srcs_scalar);
+                       assert(0);
                        break;
                }
        }
 
-       vp->program.length = (o_inst - vp->program.body.i) * 4;
+       /* Some outputs may be artificially added, to match the inputs
+          of the fragment program. Blank the outputs here. */
+       for (i = 0; i < VERT_RESULT_MAX; i++) {
+               if (vp->key.OutputsAdded & (1 << i)) {
+                       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                                    GL_FALSE,
+                                                    GL_FALSE,
+                                                    vp->outputs[i],
+                                                    VSF_FLAG_ALL,
+                                                    PVS_DST_REG_OUT);
+                       inst[1] = __CONST(0, SWIZZLE_ZERO);
+                       inst[2] = __CONST(0, SWIZZLE_ZERO);
+                       inst[3] = __CONST(0, SWIZZLE_ZERO);
+                       inst += 4;
+               }
+       }
+
+       vp->program.length = (inst - vp->program.body.i);
        if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) {
                vp->program.length = 0;
                vp->native = GL_FALSE;
@@ -1216,14 +1222,16 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp,
 #endif
 }
 
+/* DP4 version seems to trigger some hw peculiarity */
+//#define PREFER_DP4
+
 static void position_invariant(struct gl_program *prog)
 {
        struct prog_instruction *vpi;
        struct gl_program_parameter_list *paramList;
        int i;
 
-       gl_state_index tokens[STATE_LENGTH] =
-           { STATE_MVP_MATRIX, 0, 0, 0, 0 };
+       gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
 
        /* tokens[4] = matrix modifier */
 #ifdef PREFER_DP4
@@ -1303,8 +1311,8 @@ static void position_invariant(struct gl_program *prog)
        assert(vpi->Opcode == OPCODE_END);
 }
 
-static void insert_wpos(struct r300_vertex_program *vp,
-                       struct gl_program *prog, GLuint temp_index)
+static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
+                       GLuint temp_index)
 {
        struct prog_instruction *vpi;
        struct prog_instruction *vpi_insert;
@@ -1317,8 +1325,8 @@ static void insert_wpos(struct r300_vertex_program *vp,
                                prog->NumInstructions - 1);
        /* END */
        _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
-                               &prog->Instructions[prog->NumInstructions -
-                                                   1], 1);
+                               &prog->Instructions[prog->NumInstructions - 1],
+                               1);
        vpi_insert = &vpi[prog->NumInstructions - 1];
 
        vpi_insert[i].Opcode = OPCODE_MOV;
@@ -1398,6 +1406,15 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key
        return vp;
 }
 
+static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+{
+       if (key->OutputsWritten & (1 << vert))
+               return;
+
+       key->OutputsWritten |= 1 << vert;
+       key->OutputsAdded |= 1 << vert;
+}
+
 void r300SelectVertexShader(r300ContextPtr r300)
 {
        GLcontext *ctx = ctx = r300->radeon.glCtx;
@@ -1408,8 +1425,9 @@ void r300SelectVertexShader(r300ContextPtr r300)
        struct r300_vertex_program *vp;
        GLint wpos_idx;
 
-       vpc =
-           (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+       vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
+       wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
        InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
 
        wpos_idx = -1;
@@ -1423,15 +1441,29 @@ void r300SelectVertexShader(r300ContextPtr r300)
                        _mesa_exit(-1);
                }
 
-               InputsRead |= (FRAG_BIT_TEX0 << i);
+               wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
                wpos_idx = i;
        }
-       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
-       wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
+       add_outputs(&wanted_key, VERT_RESULT_HPOS);
+
+       if (InputsRead & FRAG_BIT_COL0) {
+               add_outputs(&wanted_key, VERT_RESULT_COL0);
+       }
+
+       if (InputsRead & FRAG_BIT_COL1) {
+               add_outputs(&wanted_key, VERT_RESULT_COL1);
+       }
+
+       for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+               if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+                       add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
+               }
+       }
+
        if (vpc->mesa_program.IsPositionInvariant) {
                /* we wan't position don't we ? */
                wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
-               wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
        }
 
        for (vp = vpc->progs; vp; vp = vp->next)