Merge branch 'mesa_7_5_branch'
[mesa.git] / src / mesa / drivers / dri / r300 / r300_vertprog.c
index ddb4996ee028e1c357524832d0a9132ac2c68118..c41a8fdd621a373f3288febfea349268982ad8de 100644 (file)
@@ -1,6 +1,7 @@
 /**************************************************************************
 
-Copyright (C) 2005 Aapo Tahkola.
+Copyright (C) 2005  Aapo Tahkola <aet@rasterburn.org>
+Copyright (C) 2008  Oliver McFadden <z3ro.geek@gmail.com>
 
 All Rights Reserved.
 
@@ -25,93 +26,67 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 **************************************************************************/
 
-/*
- * Authors:
- *   Aapo Tahkola <aet@rasterburn.org>
- */
-#include "glheader.h"
-#include "macros.h"
-#include "enums.h"
-#include "program.h"
+/* Radeon R5xx Acceleration, Revision 1.2 */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
 #include "shader/prog_instruction.h"
 #include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
 #include "shader/prog_statevars.h"
 #include "tnl/tnl.h"
 
 #include "r300_context.h"
-#include "r300_program.h"
-
-#if SWIZZLE_X != VSF_IN_COMPONENT_X || \
-    SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
-    SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
-    SWIZZLE_W != VSF_IN_COMPONENT_W || \
-    SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
-    SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
-    WRITEMASK_X != VSF_FLAG_X || \
-    WRITEMASK_Y != VSF_FLAG_Y || \
-    WRITEMASK_Z != VSF_FLAG_Z || \
-    WRITEMASK_W != VSF_FLAG_W
-#error Cannot change these!
-#endif
+#include "r300_state.h"
+
+/* TODO: Get rid of t_src_class call */
+#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
+                      ((t_src_class(a.File) == PVS_SRC_REG_CONSTANT && \
+                        t_src_class(b.File) == PVS_SRC_REG_CONSTANT) || \
+                       (t_src_class(a.File) == PVS_SRC_REG_INPUT && \
+                        t_src_class(b.File) == PVS_SRC_REG_INPUT)))) \
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y)  \
+       (PVS_SRC_OPERAND(t_src_index(vp, &src[x]),      \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_swizzle(y),        \
+                          t_src_class(src[x].File), \
+                          VSF_FLAG_NONE) | (src[x].RelAddr << 4))
 
-#define SCALAR_FLAG (1<<31)
-#define FLAG_MASK (1<<31)
-#define OP_MASK        (0xf)  /* we are unlikely to have more than 15 */
-#define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
-
-static struct{
-       char *name;
-       int opcode;
-       unsigned long ip; /* number of input operands and flags */
-}op_names[]={
-       OPN(ABS, 1),
-       OPN(ADD, 2),
-       OPN(ARL, 1|SCALAR_FLAG),
-       OPN(DP3, 2),
-       OPN(DP4, 2),
-       OPN(DPH, 2),
-       OPN(DST, 2),
-       OPN(EX2, 1|SCALAR_FLAG),
-       OPN(EXP, 1|SCALAR_FLAG),
-       OPN(FLR, 1),
-       OPN(FRC, 1),
-       OPN(LG2, 1|SCALAR_FLAG),
-       OPN(LIT, 1),
-       OPN(LOG, 1|SCALAR_FLAG),
-       OPN(MAD, 3),
-       OPN(MAX, 2),
-       OPN(MIN, 2),
-       OPN(MOV, 1),
-       OPN(MUL, 2),
-       OPN(POW, 2|SCALAR_FLAG),
-       OPN(RCP, 1|SCALAR_FLAG),
-       OPN(RSQ, 1|SCALAR_FLAG),
-       OPN(SGE, 2),
-       OPN(SLT, 2),
-       OPN(SUB, 2),
-       OPN(SWZ, 1),
-       OPN(XPD, 2),
-       OPN(RCC, 0), //extra
-       OPN(PRINT, 0),
-       OPN(END, 0),
-};
-#undef OPN
-
-int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program_cont *vp, float *dst)
+#define FREE_TEMPS() \
+       do { \
+               int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \
+               if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \
+                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \
+                       vp->error = GL_TRUE; \
+               } \
+               u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
+       } while (0)
+
+int r300VertexProgUpdateParams(GLcontext * ctx,
+                              struct r300_vertex_program_cont *vp, float *dst)
 {
        int pi;
        struct gl_vertex_program *mesa_vp = &vp->mesa_program;
-       float *dst_o=dst;
-        struct gl_program_parameter_list *paramList;
+       float *dst_o = dst;
+       struct gl_program_parameter_list *paramList;
 
        if (mesa_vp->IsNVProgram) {
                _mesa_load_tracked_matrices(ctx);
 
-               for (pi=0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
-                       *dst++=ctx->VertexProgram.Parameters[pi][0];
-                       *dst++=ctx->VertexProgram.Parameters[pi][1];
-                       *dst++=ctx->VertexProgram.Parameters[pi][2];
-                       *dst++=ctx->VertexProgram.Parameters[pi][3];
+               for (pi = 0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
+                       *dst++ = ctx->VertexProgram.Parameters[pi][0];
+                       *dst++ = ctx->VertexProgram.Parameters[pi][1];
+                       *dst++ = ctx->VertexProgram.Parameters[pi][2];
+                       *dst++ = ctx->VertexProgram.Parameters[pi][3];
                }
                return dst - dst_o;
        }
@@ -119,26 +94,27 @@ int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program_cont *
        assert(mesa_vp->Base.Parameters);
        _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
 
-       if(mesa_vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){
+       if (mesa_vp->Base.Parameters->NumParameters * 4 >
+           VSF_MAX_FRAGMENT_LENGTH) {
                fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
                _mesa_exit(-1);
        }
 
-        paramList = mesa_vp->Base.Parameters;
-       for(pi=0; pi < paramList->NumParameters; pi++){
-               switch(paramList->Parameters[pi].Type){
-
+       paramList = mesa_vp->Base.Parameters;
+       for (pi = 0; pi < paramList->NumParameters; pi++) {
+               switch (paramList->Parameters[pi].Type) {
                case PROGRAM_STATE_VAR:
                case PROGRAM_NAMED_PARAM:
                        //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
                case PROGRAM_CONSTANT:
-                       *dst++=paramList->ParameterValues[pi][0];
-                       *dst++=paramList->ParameterValues[pi][1];
-                       *dst++=paramList->ParameterValues[pi][2];
-                       *dst++=paramList->ParameterValues[pi][3];
-               break;
-
-               default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
+                       *dst++ = paramList->ParameterValues[pi][0];
+                       *dst++ = paramList->ParameterValues[pi][1];
+                       *dst++ = paramList->ParameterValues[pi][2];
+                       *dst++ = paramList->ParameterValues[pi][3];
+                       break;
+               default:
+                       _mesa_problem(NULL, "Bad param type in %s",
+                                     __FUNCTION__);
                }
 
        }
@@ -152,66 +128,67 @@ static unsigned long t_dst_mask(GLuint mask)
        return mask & VSF_FLAG_ALL;
 }
 
-static unsigned long t_dst_class(enum register_file file)
+static unsigned long t_dst_class(gl_register_file file)
 {
 
-       switch(file){
-               case PROGRAM_TEMPORARY:
-                       return VSF_OUT_CLASS_TMP;
-               case PROGRAM_OUTPUT:
-                       return VSF_OUT_CLASS_RESULT;
-               case PROGRAM_ADDRESS:
-                       return VSF_OUT_CLASS_ADDR;
+       switch (file) {
+       case PROGRAM_TEMPORARY:
+               return PVS_DST_REG_TEMPORARY;
+       case PROGRAM_OUTPUT:
+               return PVS_DST_REG_OUT;
+       case PROGRAM_ADDRESS:
+               return PVS_DST_REG_A0;
                /*
-               case PROGRAM_INPUT:
-               case PROGRAM_LOCAL_PARAM:
-               case PROGRAM_ENV_PARAM:
-               case PROGRAM_NAMED_PARAM:
-               case PROGRAM_STATE_VAR:
-               case PROGRAM_WRITE_ONLY:
-               case PROGRAM_ADDRESS:
-               */
-               default:
-                       fprintf(stderr, "problem in %s", __FUNCTION__);
-                       _mesa_exit(-1);
+                  case PROGRAM_INPUT:
+                  case PROGRAM_LOCAL_PARAM:
+                  case PROGRAM_ENV_PARAM:
+                  case PROGRAM_NAMED_PARAM:
+                  case PROGRAM_STATE_VAR:
+                  case PROGRAM_WRITE_ONLY:
+                  case PROGRAM_ADDRESS:
+                */
+       default:
+               fprintf(stderr, "problem in %s", __FUNCTION__);
+               _mesa_exit(-1);
+               return -1;
        }
 }
 
-static unsigned long t_dst_index(struct r300_vertex_program *vp, struct prog_dst_register *dst)
+static unsigned long t_dst_index(struct r300_vertex_program *vp,
+                                struct prog_dst_register *dst)
 {
-       if(dst->File == PROGRAM_OUTPUT)
+       if (dst->File == PROGRAM_OUTPUT)
                return vp->outputs[dst->Index];
 
        return dst->Index;
 }
 
-static unsigned long t_src_class(enum register_file file)
+static unsigned long t_src_class(gl_register_file file)
 {
-
-       switch(file){
-               case PROGRAM_TEMPORARY:
-                       return VSF_IN_CLASS_TMP;
-
-               case PROGRAM_INPUT:
-                       return VSF_IN_CLASS_ATTR;
-
-               case PROGRAM_LOCAL_PARAM:
-               case PROGRAM_ENV_PARAM:
-               case PROGRAM_NAMED_PARAM:
-               case PROGRAM_STATE_VAR:
-                       return VSF_IN_CLASS_PARAM;
+       switch (file) {
+       case PROGRAM_TEMPORARY:
+               return PVS_SRC_REG_TEMPORARY;
+       case PROGRAM_INPUT:
+               return PVS_SRC_REG_INPUT;
+       case PROGRAM_LOCAL_PARAM:
+       case PROGRAM_ENV_PARAM:
+       case PROGRAM_NAMED_PARAM:
+       case PROGRAM_CONSTANT:
+       case PROGRAM_STATE_VAR:
+               return PVS_SRC_REG_CONSTANT;
                /*
-               case PROGRAM_OUTPUT:
-               case PROGRAM_WRITE_ONLY:
-               case PROGRAM_ADDRESS:
-               */
-               default:
-                       fprintf(stderr, "problem in %s", __FUNCTION__);
-                       _mesa_exit(-1);
+                  case PROGRAM_OUTPUT:
+                  case PROGRAM_WRITE_ONLY:
+                  case PROGRAM_ADDRESS:
+                */
+       default:
+               fprintf(stderr, "problem in %s", __FUNCTION__);
+               _mesa_exit(-1);
+               return -1;
        }
 }
 
-static __inline unsigned long t_swizzle(GLubyte swizzle)
+static INLINE unsigned long t_swizzle(GLubyte swizzle)
 {
 /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
        return swizzle;
@@ -222,659 +199,1001 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
 {
        int i;
 
-       if(vp == NULL){
-               fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
-               return ;
+       if (vp == NULL) {
+               fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__,
+                       caller);
+               return;
        }
 
        fprintf(stderr, "%s:<", caller);
-       for(i=0; i < VERT_ATTRIB_MAX; i++)
+       for (i = 0; i < VERT_ATTRIB_MAX; i++)
                fprintf(stderr, "%d ", vp->inputs[i]);
        fprintf(stderr, ">\n");
 
 }
 #endif
 
-static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src)
+static unsigned long t_src_index(struct r300_vertex_program *vp,
+                                struct prog_src_register *src)
 {
-       int i;
-       int max_reg=-1;
-
-       if(src->File == PROGRAM_INPUT){
-               if(vp->inputs[src->Index] != -1)
-                       return vp->inputs[src->Index];
-
-               for(i=0; i < VERT_ATTRIB_MAX; i++)
-                       if(vp->inputs[i] > max_reg)
-                               max_reg=vp->inputs[i];
-
-               vp->inputs[src->Index]=max_reg+1;
-
-               //vp_dump_inputs(vp, __FUNCTION__);
-
+       if (src->File == PROGRAM_INPUT) {
+               assert(vp->inputs[src->Index] != -1);
                return vp->inputs[src->Index];
-       }else{
+       } else {
                if (src->Index < 0) {
-                       fprintf (stderr, "negative offsets for indirect addressing do not work.\n");
+                       fprintf(stderr,
+                               "negative offsets for indirect addressing do not work.\n");
                        return 0;
                }
                return src->Index;
        }
 }
 
-static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src)
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program *vp,
+                          struct prog_src_register *src)
 {
-       /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
+       /* src->Negate uses the NEGATE_ flags from program_instruction.h,
         * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
         */
-       return MAKE_VSF_SOURCE(t_src_index(vp, src),
-                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                               t_swizzle(GET_SWZ(src->Swizzle, 1)),
-                               t_swizzle(GET_SWZ(src->Swizzle, 2)),
-                               t_swizzle(GET_SWZ(src->Swizzle, 3)),
-                               t_src_class(src->File),
-                               src->NegateBase) | (src->RelAddr << 4);
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 1)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 2)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 3)),
+                              t_src_class(src->File),
+                              src->Negate) | (src->RelAddr << 4);
 }
 
-static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src)
+static unsigned long t_src_scalar(struct r300_vertex_program *vp,
+                                 struct prog_src_register *src)
 {
+       /* src->Negate uses the NEGATE_ flags from program_instruction.h,
+        * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+        */
+       return PVS_SRC_OPERAND(t_src_index(vp, src),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                              t_src_class(src->File),
+                              src->Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src->RelAddr << 4);
+}
 
-       return MAKE_VSF_SOURCE(t_src_index(vp, src),
-                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
-                               t_src_class(src->File),
-                               src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
+static GLboolean valid_dst(struct r300_vertex_program *vp,
+                          struct prog_dst_register *dst)
+{
+       if (dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
+               return GL_FALSE;
+       } else if (dst->File == PROGRAM_ADDRESS) {
+               assert(dst->Index == 0);
+       }
+
+       return GL_TRUE;
 }
 
-static unsigned long t_opcode(enum prog_opcode opcode)
+static GLuint *r300TranslateOpcodeABS(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
+       //MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+                                 t_src_class(src[0].File),
+                                 (!src[0].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] = 0;
+
+       return inst;
+}
 
-       switch(opcode){
-               case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
-               case OPCODE_DST: return R300_VPI_OUT_OP_DST;
-               case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
-               case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
-               case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
-               case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
-               case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
-               case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
-               case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
-               case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
-               case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
-               case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
-               case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
-               case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
-               case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
+static GLuint *r300TranslateOpcodeADD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-               default:
-                       fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
-       }
-       _mesa_exit(-1);
-       return 0;
+static GLuint *r300TranslateOpcodeARL(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_FLT2FIX_DX,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static unsigned long op_operands(enum prog_opcode opcode)
+static GLuint *r300TranslateOpcodeDP3(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       int i;
+       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 SWIZZLE_ZERO,
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] =
+           PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                           t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                           t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                           t_swizzle(GET_SWZ(src[1].Swizzle, 2)), SWIZZLE_ZERO,
+                           t_src_class(src[1].File),
+                           src[1].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       /* Can we trust mesas opcodes to be in order ? */
-       for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
-               if(op_names[i].opcode == opcode)
-                       return op_names[i].ip;
+static GLuint *r300TranslateOpcodeDP4(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       fprintf(stderr, "op %d not found in op_names\n", opcode);
-       _mesa_exit(-1);
-       return 0;
+static GLuint *r300TranslateOpcodeDPH(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
+       inst[0] = PVS_OP_DST_OPERAND(VE_DOT_PRODUCT,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                 PVS_SRC_SELECT_FORCE_1,
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_XYZ : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-static GLboolean valid_dst(struct r300_vertex_program *vp, struct prog_dst_register *dst)
+static GLuint *r300TranslateOpcodeDST(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       if(dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1) {
-               return GL_FALSE;
-       } else if(dst->File == PROGRAM_ADDRESS) {
-               assert(dst->Index == 0);
-       }
+       inst[0] = PVS_OP_DST_OPERAND(VE_DISTANCE_VECTOR,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       return GL_TRUE;
+static GLuint *r300TranslateOpcodeEX2(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_FULL_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
 }
 
-/* TODO: Get rid of t_src_class call */
-#define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
-                      ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
-                        t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
-                       (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
-                        t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
-
-#define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
-
-#define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
-
-#define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
-
-#define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   SWIZZLE_ONE, SWIZZLE_ONE, \
-                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
+static GLuint *r300TranslateOpcodeEXP(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_EXP_BASE2_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-/* DP4 version seems to trigger some hw peculiarity */
-//#define PREFER_DP4
+static GLuint *r300TranslateOpcodeFLR(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3],
+                                     int *u_temp_i)
+{
+       /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
+          ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    *u_temp_i,
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    PVS_DST_REG_TEMPORARY);
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+       inst += 4;
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(*u_temp_i,
+                                 PVS_SRC_SELECT_X,
+                                 PVS_SRC_SELECT_Y,
+                                 PVS_SRC_SELECT_Z,
+                                 PVS_SRC_SELECT_W, PVS_SRC_REG_TEMPORARY,
+                                 /* Not 100% sure about this */
+                                 (!src[0].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE
+                                 /*VSF_FLAG_ALL */ );
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+       (*u_temp_i)--;
+
+       return inst;
+}
 
-#define FREE_TEMPS() \
-       do { \
-               if(u_temp_i < vp->num_temporaries) { \
-                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
-                       vp->native = GL_FALSE; \
-               } \
-               u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
-       } while (0)
+static GLuint *r300TranslateOpcodeFRC(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_FRACTION,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-static void r300_translate_vertex_shader(struct r300_vertex_program *vp, struct prog_instruction *vpi)
+static GLuint *r300TranslateOpcodeLG2(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
 {
-       int i, cur_reg=0;
-       VERTEX_SHADER_INSTRUCTION *o_inst;
-       unsigned long operands;
-       int are_srcs_scalar;
-       unsigned long hw_op;
-       /* Initial value should be last tmp reg that hw supports.
-          Strangely enough r300 doesnt mind even though these would be out of range.
-          Smart enough to realize that it doesnt need it? */
-       int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
-       struct prog_src_register src[3];
+       // LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
+
+       inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_FULL_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       vp->pos_end=0; /* Not supported yet */
-       vp->program.length=0;
-       /*vp->num_temporaries=mesa_vp->Base.NumTemporaries;*/
+static GLuint *r300TranslateOpcodeLIT(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+       inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       /* NOTE: Users swizzling might not work. */
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 0)),      // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // Y
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 PVS_SRC_SELECT_FORCE_0,       // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+
+       return inst;
+}
 
-       for(i=0; i < VERT_ATTRIB_MAX; i++)
-               vp->inputs[i] = -1;
+static GLuint *r300TranslateOpcodeLOG(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_LOG_BASE2_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
 
-       for(i=0; i < VERT_RESULT_MAX; i++)
+static GLuint *r300TranslateOpcodeMAD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+                                    GL_FALSE,
+                                    GL_TRUE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = t_src(vp, &src[2]);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeMAX(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_MAXIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeMIN(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_MINIMUM,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeMOV(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeMUL(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodePOW(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = t_src_scalar(vp, &src[1]);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeRCP(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeRSQ(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(ME_RECIP_SQRT_DX,
+                                    GL_TRUE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src_scalar(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeSGE(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_SET_GREATER_THAN_EQUAL,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeSLT(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       inst[0] = PVS_OP_DST_OPERAND(VE_SET_LESS_THAN,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = t_src(vp, &src[1]);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeSUB(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+
+#if 0
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[3] = 0;
+#else
+       inst[0] =
+           PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                              GL_FALSE,
+                              GL_FALSE,
+                              t_dst_index(vp, &vpi->DstReg),
+                              t_dst_mask(vpi->DstReg.WriteMask),
+                              t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ONE);
+       inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+#endif
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeSWZ(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3])
+{
+       //ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = t_src(vp, &src[0]);
+       inst[2] = __CONST(0, SWIZZLE_ZERO);
+       inst[3] = __CONST(0, SWIZZLE_ZERO);
+
+       return inst;
+}
+
+static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi,
+                                     GLuint * inst,
+                                     struct prog_src_register src[3],
+                                     int *u_temp_i)
+{
+       /* mul r0, r1.yzxw, r2.zxyw
+          mad r0, -r2.yzxw, r1.zxyw, r0
+        */
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    *u_temp_i,
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    PVS_DST_REG_TEMPORARY);
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 2)),        // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 2)),      // Z
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 1)),        // Y
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // W
+                                 t_src_class(src[1].File),
+                                 src[1].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[3] = __CONST(1, SWIZZLE_ZERO);
+       inst += 4;
+
+       inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+                                    GL_FALSE,
+                                    GL_FALSE,
+                                    t_dst_index(vp, &vpi->DstReg),
+                                    t_dst_mask(vpi->DstReg.WriteMask),
+                                    t_dst_class(vpi->DstReg.File));
+       inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &src[1]), t_swizzle(GET_SWZ(src[1].Swizzle, 1)),      // Y
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 2)),        // Z
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[1].Swizzle, 3)),        // W
+                                 t_src_class(src[1].File),
+                                 (!src[1].
+                                  Negate) ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[1].RelAddr << 4);
+       inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &src[0]), t_swizzle(GET_SWZ(src[0].Swizzle, 2)),      // Z
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 0)),        // X
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 1)),        // Y
+                                 t_swizzle(GET_SWZ(src[0].Swizzle, 3)),        // W
+                                 t_src_class(src[0].File),
+                                 src[0].Negate ? VSF_FLAG_ALL : VSF_FLAG_NONE) |
+           (src[0].RelAddr << 4);
+       inst[3] =
+           PVS_SRC_OPERAND(*u_temp_i, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
+                           PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
+                           PVS_SRC_REG_TEMPORARY, VSF_FLAG_NONE);
+
+       (*u_temp_i)--;
+
+       return inst;
+}
+
+static void t_inputs_outputs(struct r300_vertex_program *vp)
+{
+       int i;
+       int cur_reg;
+
+       cur_reg = -1;
+       for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+               if (vp->key.InputsRead & (1 << i))
+                       vp->inputs[i] = ++cur_reg;
+               else
+                       vp->inputs[i] = -1;
+       }
+
+       cur_reg = 0;
+       for (i = 0; i < VERT_RESULT_MAX; i++)
                vp->outputs[i] = -1;
 
        assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
 
-       /* Assign outputs */
-       if(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS)) {
                vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+       }
 
-       if(vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
                vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+       }
 
-       if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
+       /* If we're writing back facing colors we need to send
+        * four colors to make front/back face colors selection work.
+        * If the vertex program doesn't write all 4 colors, lets
+        * pretend it does by skipping output index reg so the colors
+        * get written into appropriate output vectors.
+        */
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) {
                vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
+       }
 
-       if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) {
                vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+               vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
+       }
 
-#if 0 /* Not supported yet */
-       if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
                vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+               cur_reg++;
+       }
 
-       if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) {
                vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+       } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+               cur_reg++;
+       }
+
+       for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
+               if (vp->key.OutputsWritten & (1 << i)) {
+                       vp->outputs[i] = cur_reg++;
+               }
+       }
 
-       if(vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
+       if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
                vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
-#endif
+       }
+}
 
-       for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
-               if(vp->key.OutputsWritten & (1 << i))
-                       vp->outputs[i] = cur_reg++;
+static void r300TranslateVertexShader(struct r300_vertex_program *vp,
+                                     struct prog_instruction *vpi)
+{
+       int i;
+       GLuint *inst;
+       unsigned long num_operands;
+       /* Initial value should be last tmp reg that hw supports.
+          Strangely enough r300 doesnt mind even though these would be out of range.
+          Smart enough to realize that it doesnt need it? */
+       int u_temp_i = VSF_MAX_FRAGMENT_TEMPS - 1;
+       struct prog_src_register src[3];
 
+       vp->pos_end = 0;        /* Not supported yet */
+       vp->hw_code.length = 0;
        vp->translated = GL_TRUE;
-       vp->native = GL_TRUE;
+       vp->error = GL_FALSE;
+
+       t_inputs_outputs(vp);
+
+       for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END;
+            vpi++, inst += 4) {
 
-       o_inst=vp->program.body.i;
-       for(; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
                FREE_TEMPS();
 
-               if(!valid_dst(vp, &vpi->DstReg))
-               {
+               if (!valid_dst(vp, &vpi->DstReg)) {
                        /* redirect result to unused temp */
                        vpi->DstReg.File = PROGRAM_TEMPORARY;
                        vpi->DstReg.Index = u_temp_i;
                }
 
-               operands=op_operands(vpi->Opcode);
-               are_srcs_scalar=operands & SCALAR_FLAG;
-               operands &= OP_MASK;
+               num_operands = _mesa_num_inst_src_regs(vpi->Opcode);
 
-               for(i=0; i < operands; i++)
-                       src[i]=vpi->SrcReg[i];
-
-               if(operands == 3){ /* TODO: scalars */
-                       if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
-                               o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
-                                               VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
-
-                               o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
-                                               SWIZZLE_X, SWIZZLE_Y,
-                                               SWIZZLE_Z, SWIZZLE_W,
-                                               t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
-
-                               o_inst->src[1]=ZERO_SRC_2;
-                               o_inst->src[2]=ZERO_SRC_2;
-                               o_inst++;
+               /* copy the sources (src) from mesa into a local variable... is this needed? */
+               for (i = 0; i < num_operands; i++) {
+                       src[i] = vpi->SrcReg[i];
+               }
 
-                               src[2].File=PROGRAM_TEMPORARY;
-                               src[2].Index=u_temp_i;
-                               src[2].RelAddr=0;
+               if (num_operands == 3) {        /* TODO: scalars */
+                       if (CMP_SRCS(src[1], src[2])
+                           || CMP_SRCS(src[0], src[2])) {
+                               inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                                            GL_FALSE,
+                                                            GL_FALSE,
+                                                            u_temp_i,
+                                                            VSF_FLAG_ALL,
+                                                            PVS_DST_REG_TEMPORARY);
+                               inst[1] =
+                                   PVS_SRC_OPERAND(t_src_index(vp, &src[2]),
+                                                   SWIZZLE_X,
+                                                   SWIZZLE_Y,
+                                                   SWIZZLE_Z,
+                                                   SWIZZLE_W,
+                                                   t_src_class(src[2].File),
+                                                   VSF_FLAG_NONE) | (src[2].
+                                                                     RelAddr <<
+                                                                     4);
+                               inst[2] = __CONST(2, SWIZZLE_ZERO);
+                               inst[3] = __CONST(2, SWIZZLE_ZERO);
+                               inst += 4;
+
+                               src[2].File = PROGRAM_TEMPORARY;
+                               src[2].Index = u_temp_i;
+                               src[2].RelAddr = 0;
                                u_temp_i--;
                        }
-
                }
 
-               if(operands >= 2){
-                       if( CMP_SRCS(src[1], src[0]) ){
-                               o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
-                                               VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
-
-                               o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                               SWIZZLE_X, SWIZZLE_Y,
-                                               SWIZZLE_Z, SWIZZLE_W,
-                                               t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-
-                               o_inst->src[1]=ZERO_SRC_0;
-                               o_inst->src[2]=ZERO_SRC_0;
-                               o_inst++;
-
-                               src[0].File=PROGRAM_TEMPORARY;
-                               src[0].Index=u_temp_i;
-                               src[0].RelAddr=0;
+               if (num_operands >= 2) {
+                       if (CMP_SRCS(src[1], src[0])) {
+                               inst[0] = PVS_OP_DST_OPERAND(VE_ADD,
+                                                            GL_FALSE,
+                                                            GL_FALSE,
+                                                            u_temp_i,
+                                                            VSF_FLAG_ALL,
+                                                            PVS_DST_REG_TEMPORARY);
+                               inst[1] =
+                                   PVS_SRC_OPERAND(t_src_index(vp, &src[0]),
+                                                   SWIZZLE_X,
+                                                   SWIZZLE_Y,
+                                                   SWIZZLE_Z,
+                                                   SWIZZLE_W,
+                                                   t_src_class(src[0].File),
+                                                   VSF_FLAG_NONE) | (src[0].
+                                                                     RelAddr <<
+                                                                     4);
+                               inst[2] = __CONST(0, SWIZZLE_ZERO);
+                               inst[3] = __CONST(0, SWIZZLE_ZERO);
+                               inst += 4;
+
+                               src[0].File = PROGRAM_TEMPORARY;
+                               src[0].Index = u_temp_i;
+                               src[0].RelAddr = 0;
                                u_temp_i--;
                        }
                }
 
-               /* These ops need special handling. */
-               switch(vpi->Opcode){
-               case OPCODE_POW:
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=t_src_scalar(vp, &src[0]);
-                       o_inst->src[1]=ZERO_SRC_0;
-                       o_inst->src[2]=t_src_scalar(vp, &src[1]);
-                       goto next;
-
-               case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO}
-               case OPCODE_SWZ:
-#if 1
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=ZERO_SRC_0;
-                       o_inst->src[2]=ZERO_SRC_0;
-#else
-                       hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
-
-                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=ONE_SRC_0;
-                       o_inst->src[2]=ZERO_SRC_0;
-#endif
-
-                       goto next;
-
+               switch (vpi->Opcode) {
+               case OPCODE_ABS:
+                       inst = r300TranslateOpcodeABS(vp, vpi, inst, src);
+                       break;
                case OPCODE_ADD:
-#if 1
-                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
-                               src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
-
-                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=ONE_SRC_0;
-                       o_inst->src[1]=t_src(vp, &src[0]);
-                       o_inst->src[2]=t_src(vp, &src[1]);
-#else
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=t_src(vp, &src[1]);
-                       o_inst->src[2]=ZERO_SRC_1;
-
-#endif
-                       goto next;
-
-               case OPCODE_MAD:
-                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
-                               src[1].File == PROGRAM_TEMPORARY &&
-                               src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
-
-                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=t_src(vp, &src[1]);
-                       o_inst->src[2]=t_src(vp, &src[2]);
-                       goto next;
-
-               case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
-                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
-                               src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
-
-                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=t_src(vp, &src[1]);
-
-                       o_inst->src[2]=ZERO_SRC_1;
-                       goto next;
-
-               case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO}
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-                       o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                                       SWIZZLE_ZERO,
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-
-                       o_inst->src[1]=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                                       SWIZZLE_ZERO,
-                                       t_src_class(src[1].File),
-                                       src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
-
-                       o_inst->src[2]=ZERO_SRC_1;
-                       goto next;
-
-               case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-#if 1
-                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
-                               src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
-
-                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
-                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=ONE_SRC_0;
-                       o_inst->src[2]=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
-                                       t_src_class(src[1].File),
-                                       (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE)  | (src[1].RelAddr << 4);
-#else
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
-                                       t_src_class(src[1].File),
-                                       (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
-                       o_inst->src[2]=0;
-#endif
-                       goto next;
-
-               case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
-                                       t_src_class(src[0].File),
-                                       (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-                       o_inst->src[2]=0;
-                       goto next;
-
+                       inst = r300TranslateOpcodeADD(vp, vpi, inst, src);
+                       break;
+               case OPCODE_ARL:
+                       inst = r300TranslateOpcodeARL(vp, vpi, inst, src);
+                       break;
+               case OPCODE_DP3:
+                       inst = r300TranslateOpcodeDP3(vp, vpi, inst, src);
+                       break;
+               case OPCODE_DP4:
+                       inst = r300TranslateOpcodeDP4(vp, vpi, inst, src);
+                       break;
+               case OPCODE_DPH:
+                       inst = r300TranslateOpcodeDPH(vp, vpi, inst, src);
+                       break;
+               case OPCODE_DST:
+                       inst = r300TranslateOpcodeDST(vp, vpi, inst, src);
+                       break;
+               case OPCODE_EX2:
+                       inst = r300TranslateOpcodeEX2(vp, vpi, inst, src);
+                       break;
+               case OPCODE_EXP:
+                       inst = r300TranslateOpcodeEXP(vp, vpi, inst, src);
+                       break;
                case OPCODE_FLR:
-               /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W}
-                  ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
-
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
-                                       t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
-
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=ZERO_SRC_0;
-                       o_inst->src[2]=ZERO_SRC_0;
-                       o_inst++;
-
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-                       o_inst->src[0]=t_src(vp, &src[0]);
-                       o_inst->src[1]=MAKE_VSF_SOURCE(u_temp_i,
-                                       VSF_IN_COMPONENT_X,
-                                       VSF_IN_COMPONENT_Y,
-                                       VSF_IN_COMPONENT_Z,
-                                       VSF_IN_COMPONENT_W,
-                                       VSF_IN_CLASS_TMP,
-                                       /* Not 100% sure about this */
-                                       (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
-
-                       o_inst->src[2]=ZERO_SRC_0;
-                       u_temp_i--;
-                       goto next;
-
-               case OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-                       o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-                       o_inst->src[1]=ZERO_SRC_0;
-                       o_inst->src[2]=ZERO_SRC_0;
-                       goto next;
-
-               case OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-                       /* NOTE: Users swizzling might not work. */
-                       o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                       VSF_IN_COMPONENT_ZERO, // z
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-                       o_inst->src[1]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                       VSF_IN_COMPONENT_ZERO, // z
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-                       o_inst->src[2]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                       VSF_IN_COMPONENT_ZERO, // z
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-                       goto next;
-
-               case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W}
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-                       o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
-                                       VSF_IN_COMPONENT_ONE,
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-                       o_inst->src[1]=t_src(vp, &src[1]);
-                       o_inst->src[2]=ZERO_SRC_1;
-                       goto next;
-
+                       inst = r300TranslateOpcodeFLR(vp, vpi, inst, src,       /* FIXME */
+                                                     &u_temp_i);
+                       break;
+               case OPCODE_FRC:
+                       inst = r300TranslateOpcodeFRC(vp, vpi, inst, src);
+                       break;
+               case OPCODE_LG2:
+                       inst = r300TranslateOpcodeLG2(vp, vpi, inst, src);
+                       break;
+               case OPCODE_LIT:
+                       inst = r300TranslateOpcodeLIT(vp, vpi, inst, src);
+                       break;
+               case OPCODE_LOG:
+                       inst = r300TranslateOpcodeLOG(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MAD:
+                       inst = r300TranslateOpcodeMAD(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MAX:
+                       inst = r300TranslateOpcodeMAX(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MIN:
+                       inst = r300TranslateOpcodeMIN(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MOV:
+                       inst = r300TranslateOpcodeMOV(vp, vpi, inst, src);
+                       break;
+               case OPCODE_MUL:
+                       inst = r300TranslateOpcodeMUL(vp, vpi, inst, src);
+                       break;
+               case OPCODE_POW:
+                       inst = r300TranslateOpcodePOW(vp, vpi, inst, src);
+                       break;
+               case OPCODE_RCP:
+                       inst = r300TranslateOpcodeRCP(vp, vpi, inst, src);
+                       break;
+               case OPCODE_RSQ:
+                       inst = r300TranslateOpcodeRSQ(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SGE:
+                       inst = r300TranslateOpcodeSGE(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SLT:
+                       inst = r300TranslateOpcodeSLT(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SUB:
+                       inst = r300TranslateOpcodeSUB(vp, vpi, inst, src);
+                       break;
+               case OPCODE_SWZ:
+                       inst = r300TranslateOpcodeSWZ(vp, vpi, inst, src);
+                       break;
                case OPCODE_XPD:
-                       /* mul r0, r1.yzxw, r2.zxyw
-                          mad r0, -r2.yzxw, r1.zxyw, r0
-                          NOTE: might need MAD_2
-                        */
-
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
-                                       t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
-
-                       o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-
-                       o_inst->src[1]=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
-                                       t_src_class(src[1].File),
-                                       src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
-
-                       o_inst->src[2]=ZERO_SRC_1;
-                       o_inst++;
-                       u_temp_i--;
-
-                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
-                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-                       o_inst->src[0]=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
-                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
-                                       t_src_class(src[1].File),
-                                       (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
-
-                       o_inst->src[1]=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
-                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
-                                       t_src_class(src[0].File),
-                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
-
-                       o_inst->src[2]=MAKE_VSF_SOURCE(u_temp_i+1,
-                                       VSF_IN_COMPONENT_X,
-                                       VSF_IN_COMPONENT_Y,
-                                       VSF_IN_COMPONENT_Z,
-                                       VSF_IN_COMPONENT_W,
-                                       VSF_IN_CLASS_TMP,
-                                       VSF_FLAG_NONE);
-
-                       goto next;
-
-               case OPCODE_RCC:
-                       fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode);
-                       _mesa_exit(-1);
-               break;
-               case OPCODE_END:
+                       inst = r300TranslateOpcodeXPD(vp, vpi, inst, src,       /* FIXME */
+                                                     &u_temp_i);
                        break;
                default:
+                       vp->error = GL_TRUE;
                        break;
                }
-
-               o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg),
-                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
-
-               if(are_srcs_scalar){
-                       switch(operands){
-                               case 1:
-                                       o_inst->src[0]=t_src_scalar(vp, &src[0]);
-                                       o_inst->src[1]=ZERO_SRC_0;
-                                       o_inst->src[2]=ZERO_SRC_0;
-                               break;
-
-                               case 2:
-                                       o_inst->src[0]=t_src_scalar(vp, &src[0]);
-                                       o_inst->src[1]=t_src_scalar(vp, &src[1]);
-                                       o_inst->src[2]=ZERO_SRC_1;
-                               break;
-
-                               case 3:
-                                       o_inst->src[0]=t_src_scalar(vp, &src[0]);
-                                       o_inst->src[1]=t_src_scalar(vp, &src[1]);
-                                       o_inst->src[2]=t_src_scalar(vp, &src[2]);
-                               break;
-
-                               default:
-                                       fprintf(stderr, "scalars and op RCC not handled yet");
-                                       _mesa_exit(-1);
-                               break;
-                       }
-               }else{
-                       switch(operands){
-                               case 1:
-                                       o_inst->src[0]=t_src(vp, &src[0]);
-                                       o_inst->src[1]=ZERO_SRC_0;
-                                       o_inst->src[2]=ZERO_SRC_0;
-                               break;
-
-                               case 2:
-                                       o_inst->src[0]=t_src(vp, &src[0]);
-                                       o_inst->src[1]=t_src(vp, &src[1]);
-                                       o_inst->src[2]=ZERO_SRC_1;
-                               break;
-
-                               case 3:
-                                       o_inst->src[0]=t_src(vp, &src[0]);
-                                       o_inst->src[1]=t_src(vp, &src[1]);
-                                       o_inst->src[2]=t_src(vp, &src[2]);
-                               break;
-
-                               default:
-                                       fprintf(stderr, "scalars and op RCC not handled yet");
-                                       _mesa_exit(-1);
-                               break;
-                       }
-               }
-               next: ;
        }
 
-       /* Will most likely segfault before we get here... fix later. */
-       if(o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH/4) {
-               vp->program.length = 0;
-               vp->native = GL_FALSE;
-               return ;
+       vp->hw_code.length = (inst - vp->hw_code.body.d);
+       if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) {
+               vp->error = GL_TRUE;
        }
-       vp->program.length=(o_inst - vp->program.body.i) * 4;
-#if 0
-       fprintf(stderr, "hw program:\n");
-       for(i=0; i < vp->program.length; i++)
-               fprintf(stderr, "%08x\n", vp->program.body.d[i]);
-#endif
 }
 
+/* DP4 version seems to trigger some hw peculiarity */
+//#define PREFER_DP4
+
 static void position_invariant(struct gl_program *prog)
 {
        struct prog_instruction *vpi;
@@ -883,20 +1202,20 @@ static void position_invariant(struct gl_program *prog)
 
        gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
 
-        /* tokens[4] = matrix modifier */
+       /* tokens[4] = matrix modifier */
 #ifdef PREFER_DP4
-       tokens[4] = 0;  /* not transposed or inverted */
+       tokens[4] = 0;          /* not transposed or inverted */
 #else
        tokens[4] = STATE_MATRIX_TRANSPOSE;
 #endif
        paramList = prog->Parameters;
 
-       vpi = _mesa_alloc_instructions (prog->NumInstructions + 4);
-       _mesa_init_instructions (vpi, prog->NumInstructions + 4);
+       vpi = _mesa_alloc_instructions(prog->NumInstructions + 4);
+       _mesa_init_instructions(vpi, prog->NumInstructions + 4);
 
-       for (i=0; i < 4; i++) {
+       for (i = 0; i < 4; i++) {
                GLint idx;
-               tokens[2] = tokens[3] = i;  /* matrix row[i]..row[i] */
+               tokens[2] = tokens[3] = i;      /* matrix row[i]..row[i] */
                idx = _mesa_add_state_reference(paramList, tokens);
 #ifdef PREFER_DP4
                vpi[i].Opcode = OPCODE_DP4;
@@ -921,7 +1240,6 @@ static void position_invariant(struct gl_program *prog)
                else
                        vpi[i].Opcode = OPCODE_MAD;
 
-               vpi[i].StringPos = 0;
                vpi[i].Data = 0;
 
                if (i == 3)
@@ -948,34 +1266,35 @@ static void position_invariant(struct gl_program *prog)
 #endif
        }
 
-       _mesa_copy_instructions (&vpi[i], prog->Instructions, prog->NumInstructions);
+       _mesa_copy_instructions(&vpi[i], prog->Instructions,
+                               prog->NumInstructions);
 
        free(prog->Instructions);
 
        prog->Instructions = vpi;
 
        prog->NumInstructions += 4;
-       vpi = &prog->Instructions[prog->NumInstructions-1];
+       vpi = &prog->Instructions[prog->NumInstructions - 1];
 
        assert(vpi->Opcode == OPCODE_END);
 }
 
-static void insert_wpos(struct r300_vertex_program *vp,
-                      struct gl_program *prog,
-                      GLuint temp_index)
+static void insert_wpos(struct r300_vertex_program *vp, struct gl_program *prog,
+                       GLuint temp_index)
 {
        struct prog_instruction *vpi;
        struct prog_instruction *vpi_insert;
        int i = 0;
 
-       vpi = _mesa_alloc_instructions (prog->NumInstructions + 2);
-       _mesa_init_instructions (vpi, prog->NumInstructions + 2);
+       vpi = _mesa_alloc_instructions(prog->NumInstructions + 2);
+       _mesa_init_instructions(vpi, prog->NumInstructions + 2);
        /* all but END */
-       _mesa_copy_instructions (vpi, prog->Instructions, prog->NumInstructions - 1);
+       _mesa_copy_instructions(vpi, prog->Instructions,
+                               prog->NumInstructions - 1);
        /* END */
-       _mesa_copy_instructions (&vpi[prog->NumInstructions + 1],
-                                &prog->Instructions[prog->NumInstructions - 1],
-                                1);
+       _mesa_copy_instructions(&vpi[prog->NumInstructions + 1],
+                               &prog->Instructions[prog->NumInstructions - 1],
+                               1);
        vpi_insert = &vpi[prog->NumInstructions - 1];
 
        vpi_insert[i].Opcode = OPCODE_MOV;
@@ -993,7 +1312,7 @@ static void insert_wpos(struct r300_vertex_program *vp,
        vpi_insert[i].Opcode = OPCODE_MOV;
 
        vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
-       vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx;
+       vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0 + vp->wpos_idx;
        vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
        vpi_insert[i].DstReg.CondMask = COND_TR;
 
@@ -1007,7 +1326,7 @@ static void insert_wpos(struct r300_vertex_program *vp,
        prog->Instructions = vpi;
 
        prog->NumInstructions += i;
-       vpi = &prog->Instructions[prog->NumInstructions-1];
+       vpi = &prog->Instructions[prog->NumInstructions - 1];
 
        assert(vpi->Opcode == OPCODE_END);
 }
@@ -1020,9 +1339,9 @@ static void pos_as_texcoord(struct r300_vertex_program *vp,
        /* should do something else if no temps left... */
        prog->NumTemporaries++;
 
-       for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++){
-               if( vpi->DstReg.File == PROGRAM_OUTPUT &&
-                   vpi->DstReg.Index == VERT_RESULT_HPOS ){
+       for (vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++) {
+               if (vpi->DstReg.File == PROGRAM_OUTPUT
+                   && vpi->DstReg.Index == VERT_RESULT_HPOS) {
                        vpi->DstReg.File = PROGRAM_TEMPORARY;
                        vpi->DstReg.Index = tempregi;
                }
@@ -1030,34 +1349,84 @@ static void pos_as_texcoord(struct r300_vertex_program *vp,
        insert_wpos(vp, prog, tempregi);
 }
 
-static struct r300_vertex_program *build_program(struct r300_vertex_program_key *wanted_key,
-                                                struct gl_vertex_program *mesa_vp,
-                                                GLint wpos_idx)
+static struct r300_vertex_program *build_program(struct r300_vertex_program_key
+                                                *wanted_key, struct gl_vertex_program
+                                                *mesa_vp, GLint wpos_idx)
 {
        struct r300_vertex_program *vp;
 
        vp = _mesa_calloc(sizeof(*vp));
        _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
-
        vp->wpos_idx = wpos_idx;
 
-       if(mesa_vp->IsPositionInvariant) {
+       if (mesa_vp->IsPositionInvariant) {
                position_invariant(&mesa_vp->Base);
        }
 
-       if(wpos_idx > -1)
+       if (wpos_idx > -1) {
                pos_as_texcoord(vp, &mesa_vp->Base);
+       }
 
-       assert(mesa_vp->Base.NumInstructions);
+       if (RADEON_DEBUG & DEBUG_VERTS) {
+               fprintf(stderr, "Vertex program after native rewrite:\n");
+               _mesa_print_program(&mesa_vp->Base);
+               fflush(stdout);
+       }
 
-       vp->num_temporaries=mesa_vp->Base.NumTemporaries;
+       /* Some outputs may be artificially added, to match the inputs of the fragment program.
+        * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by
+        * vertex program are undefined, so just use MOV [vertex_result], CONST[0]
+        */
+       {
+               int i, count = 0;
+               for (i = 0; i < VERT_RESULT_MAX; ++i) {
+                       if (vp->key.OutputsAdded & (1 << i)) {
+                               ++count;
+                       }
+               }
+
+               if (count > 0) {
+                       struct prog_instruction *inst;
+
+                       _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count);
+                       inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count];
 
-       r300_translate_vertex_shader(vp, mesa_vp->Base.Instructions);
+                       for (i = 0; i < VERT_RESULT_MAX; ++i) {
+                               if (vp->key.OutputsAdded & (1 << i)) {
+                                       inst->Opcode = OPCODE_MOV;
+
+                                       inst->DstReg.File = PROGRAM_OUTPUT;
+                                       inst->DstReg.Index = i;
+                                       inst->DstReg.WriteMask = WRITEMASK_XYZW;
+                                       inst->DstReg.CondMask = COND_TR;
+
+                                       inst->SrcReg[0].File = PROGRAM_CONSTANT;
+                                       inst->SrcReg[0].Index = 0;
+                                       inst->SrcReg[0].Swizzle = SWIZZLE_XYZW;
+
+                                       ++inst;
+                               }
+                       }
+               }
+       }
+
+       assert(mesa_vp->Base.NumInstructions);
+       vp->num_temporaries = mesa_vp->Base.NumTemporaries;
+       r300TranslateVertexShader(vp, mesa_vp->Base.Instructions);
 
        return vp;
 }
 
-void r300_select_vertex_shader(r300ContextPtr r300)
+static void add_outputs(struct r300_vertex_program_key *key, GLint vert)
+{
+       if (key->OutputsWritten & (1 << vert))
+               return;
+
+       key->OutputsWritten |= 1 << vert;
+       key->OutputsAdded |= 1 << vert;
+}
+
+void r300SelectVertexShader(r300ContextPtr r300)
 {
        GLcontext *ctx = ctx = r300->radeon.glCtx;
        GLuint InputsRead;
@@ -1068,52 +1437,137 @@ void r300_select_vertex_shader(r300ContextPtr r300)
        GLint wpos_idx;
 
        vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
+       wanted_key.OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
        InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
 
-       wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
-
        wpos_idx = -1;
-       if (InputsRead & FRAG_BIT_WPOS){
+       if (InputsRead & FRAG_BIT_WPOS) {
                for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
                        if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
                                break;
 
-               if(i == ctx->Const.MaxTextureUnits){
+               if (i == ctx->Const.MaxTextureUnits) {
                        fprintf(stderr, "\tno free texcoord found\n");
                        _mesa_exit(-1);
                }
 
-               InputsRead |= (FRAG_BIT_TEX0 << i);
+               wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
                wpos_idx = i;
        }
 
-       if (InputsRead & FRAG_BIT_COL0)
-               wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
+       if (vpc->mesa_program.IsPositionInvariant) {
+               wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
+               wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS);
+       } else {
+               add_outputs(&wanted_key, VERT_RESULT_HPOS);
+       }
 
-       if ((InputsRead & FRAG_BIT_COL1) /*||
-           (InputsRead & FRAG_BIT_FOGC)*/)
-               wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
+       if (InputsRead & FRAG_BIT_COL0) {
+               add_outputs(&wanted_key, VERT_RESULT_COL0);
+       }
 
-       for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-               if (InputsRead & (FRAG_BIT_TEX0 << i))
-                       wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+       if (InputsRead & FRAG_BIT_COL1) {
+               add_outputs(&wanted_key, VERT_RESULT_COL1);
+       }
 
-       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
-       if(vpc->mesa_program.IsPositionInvariant) {
-               /* we wan't position don't we ? */
-               wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
+       if (InputsRead & FRAG_BIT_FOGC) {
+               add_outputs(&wanted_key, VERT_RESULT_FOGC);
+       }
+
+       for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+               if (InputsRead & (FRAG_BIT_TEX0 << i)) {
+                       add_outputs(&wanted_key, VERT_RESULT_TEX0 + i);
+               }
        }
 
        for (vp = vpc->progs; vp; vp = vp->next)
-               if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
+               if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key))
+                   == 0) {
                        r300->selected_vp = vp;
-                       return ;
+                       return;
                }
 
-       //_mesa_print_program(&vpc->mesa_program.Base);
+       if (RADEON_DEBUG & DEBUG_VERTS) {
+               fprintf(stderr, "Initial vertex program:\n");
+               _mesa_print_program(&vpc->mesa_program.Base);
+               fflush(stdout);
+       }
 
        vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
        vp->next = vpc->progs;
        vpc->progs = vp;
        r300->selected_vp = vp;
 }
+
+#define bump_vpu_count(ptr, new_count)   do { \
+               drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
+               int _nc=(new_count)/4; \
+               assert(_nc < 256); \
+               if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
+       } while(0)
+
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code)
+{
+       int i;
+
+       assert((code->length > 0) && (code->length % 4 == 0));
+
+       switch ((dest >> 8) & 0xf) {
+               case 0:
+                       R300_STATECHANGE(r300, vpi);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               case 2:
+                       R300_STATECHANGE(r300, vpp);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               case 4:
+                       R300_STATECHANGE(r300, vps);
+                       for (i = 0; i < code->length; i++)
+                               r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+                       bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
+                       break;
+               default:
+                       fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
+                       _mesa_exit(-1);
+       }
+}
+
+void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+       GLcontext *ctx = rmesa->radeon.glCtx;
+       struct r300_vertex_program *prog = rmesa->selected_vp;
+       int inst_count = 0;
+       int param_count = 0;
+       
+       /* Reset state, in case we don't use something */
+       ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+       ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+       
+       R300_STATECHANGE(rmesa, vpp);
+       param_count = r300VertexProgUpdateParams(ctx,
+                                                               (struct r300_vertex_program_cont *)
+                                                               ctx->VertexProgram._Current,
+                                                               (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+       bump_vpu_count(rmesa->hw.vpp.cmd, param_count);
+       param_count /= 4;
+
+       r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code));
+       inst_count = (prog->hw_code.length / 4) - 1;
+
+       r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead),
+                                _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries);
+
+       R300_STATECHANGE(rmesa, pvs);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+                               (inst_count << R300_PVS_LAST_INST_SHIFT);
+
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT);
+       rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+}