merge from master
authorBrian <brian@yutani.localnet.net>
Wed, 21 Mar 2007 17:57:30 +0000 (11:57 -0600)
committerBrian <brian@yutani.localnet.net>
Wed, 21 Mar 2007 17:57:30 +0000 (11:57 -0600)
25 files changed:
1  2 
docs/relnotes-6.5.3.html
src/mesa/drivers/dri/i915/i915_state.c
src/mesa/drivers/dri/i915/i915_vtbl.c
src/mesa/drivers/dri/i915tex/i915_state.c
src/mesa/drivers/dri/i965/brw_vs_tnl.c
src/mesa/drivers/dri/nouveau/nouveau_context.c
src/mesa/drivers/dri/nouveau/nouveau_shader.c
src/mesa/drivers/dri/nouveau/nouveau_shader_0.c
src/mesa/drivers/dri/nouveau/nouveau_shader_2.c
src/mesa/drivers/dri/r300/r300_fragprog.c
src/mesa/drivers/dri/r300/r300_state.c
src/mesa/drivers/dri/r300/r300_vertprog.c
src/mesa/main/colortab.c
src/mesa/main/get.c
src/mesa/main/get_gen.py
src/mesa/main/mtypes.h
src/mesa/main/pixel.c
src/mesa/shader/nvvertparse.c
src/mesa/shader/prog_instruction.c
src/mesa/shader/prog_instruction.h
src/mesa/shader/programopt.c
src/mesa/swrast/s_copypix.c
src/mesa/swrast/s_drawpix.c
src/mesa/swrast/s_readpix.c
src/mesa/tnl/t_vp_build.c

index 5bde4e00c0214e158512f4046924d34ba7c786e7,b3d2fe64556b5365ff5a8bc7e6d96910f2711aee..27053c071b825e20d2eaa62f113ed09a964e8fa2
@@@ -23,12 -24,8 +23,14 @@@ TB
  
  <h2>New features</h2>
  <ul>
 +<li>OpenGL 2.0 support.
 +<li>Entirely new Shading Language code generator.
 +<li>Much faster software execution of vertex, fragment shaders.
 +<li>New vertex buffer object infrastructure (replaces old array_cache code).
  <li>Updated glext.h file (version 39)
  <li>Updated glxext.h file (version 18)
++<li>GL_MAX_DRAWBUFFERS is now 4 (software rendering) so
++    "multiple render targets" are really supported.
  </ul>
  
  <h2>Bug fixes</h2>
@@@ -36,9 -33,7 +38,8 @@@
  <li>Fog was errantly applied when a fragment shader was enabled (bug 9346)
  <li>glPush/PopClientAttrib didn't handle VBO bindings correctly (bug 9445)
  <li>With 32-bit Z buffer, the fragment Z of lines and points was sometimes wrong.
- <li>GL_MAX_DRAWBUFFERS is now 4 (software rendering) so
-     "multiple render targets" are really supported.
 +<li>GL_POST_CONVOLUTION_ALPHA_BIAS/SCALE was broken.
+ <li>1D convolution state could effect 2D image transfers
  </ul>
  
  
Simple merge
index 21e961ce9cdcdd4d5a1c914fa74aa7d813b7a79b,dd1664bf33ab2965d318d6d64b6aa926e206b8f4..35adc4846a0744aa7d899f7a338daf65f0c7aa82
@@@ -894,7 -894,7 +893,7 @@@ static struct ureg calculate_light_atte
  
  
  /* Need to add some addtional parameters to allow lighting in object
-- * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye
++ * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
   * space lighting.
   */
  static void build_lighting( struct tnl_program *p )
             */
            VPpli = register_param3(p, STATE_LIGHT, i, 
                                    STATE_POSITION_NORMALIZED); 
-           half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+             if (p->state->light_local_viewer) {
+                 struct ureg eye_hat = get_eye_position_normalized(p);
+                 half = get_temp(p);
+                 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+                 emit_normalize_vec3(p, half, half);
+             } else {
 -                half = register_param3(p, STATE_LIGHT, i, STATE_HALF);
++                half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+             }
         } 
         else {
            struct ureg Ppli = register_param3(p, STATE_LIGHT, i, 
index 0000000000000000000000000000000000000000,092ebb1140e31532885e6a79a9bd35765741c6f2..aa2f20127ba81df942fcdcd41d8843b8368409d6
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,1117 +1,1119 @@@
 -#include "nvvertexec.h"
+ /**************************************************************************
+ Copyright (C) 2005 Aapo Tahkola.
+ All Rights Reserved.
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ on the rights to use, copy, modify, merge, publish, distribute, sub
+ license, and/or sell copies of the Software, and to permit persons to whom
+ the Software is furnished to do so, subject to the following conditions:
+ The above copyright notice and this permission notice (including the next
+ paragraph) shall be included in all copies or substantial portions of the
+ Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Aapo Tahkola <aet@rasterburn.org>
+  */
+ #include "glheader.h"
+ #include "macros.h"
+ #include "enums.h"
+ #include "program.h"
 -#include "program_instruction.h"
++#include "shader/prog_instruction.h"
++#include "shader/prog_parameter.h"
++#include "shader/prog_statevars.h"
++#include "tnl/tnl.h"
+ #include "r300_context.h"
+ #include "r300_program.h"
 -              _mesa_init_vp_per_primitive_registers(ctx);
+ #if SWIZZLE_X != VSF_IN_COMPONENT_X || \
+     SWIZZLE_Y != VSF_IN_COMPONENT_Y || \
+     SWIZZLE_Z != VSF_IN_COMPONENT_Z || \
+     SWIZZLE_W != VSF_IN_COMPONENT_W || \
+     SWIZZLE_ZERO != VSF_IN_COMPONENT_ZERO || \
+     SWIZZLE_ONE != VSF_IN_COMPONENT_ONE || \
+     WRITEMASK_X != VSF_FLAG_X || \
+     WRITEMASK_Y != VSF_FLAG_Y || \
+     WRITEMASK_Z != VSF_FLAG_Z || \
+     WRITEMASK_W != VSF_FLAG_W
+ #error Cannot change these!
+ #endif
+     
+ #define SCALAR_FLAG (1<<31)
+ #define FLAG_MASK (1<<31)
+ #define OP_MASK       (0xf)  /* we are unlikely to have more than 15 */
+ #define OPN(operator, ip) {#operator, OPCODE_##operator, ip}
+ static struct{
+       char *name;
+       int opcode;
+       unsigned long ip; /* number of input operands and flags */
+ }op_names[]={
+       OPN(ABS, 1),
+       OPN(ADD, 2),
+       OPN(ARL, 1|SCALAR_FLAG),
+       OPN(DP3, 2),
+       OPN(DP4, 2),
+       OPN(DPH, 2),
+       OPN(DST, 2),
+       OPN(EX2, 1|SCALAR_FLAG),
+       OPN(EXP, 1|SCALAR_FLAG),
+       OPN(FLR, 1),
+       OPN(FRC, 1),
+       OPN(LG2, 1|SCALAR_FLAG),
+       OPN(LIT, 1),
+       OPN(LOG, 1|SCALAR_FLAG),
+       OPN(MAD, 3),
+       OPN(MAX, 2),
+       OPN(MIN, 2),
+       OPN(MOV, 1),
+       OPN(MUL, 2),
+       OPN(POW, 2|SCALAR_FLAG),
+       OPN(RCP, 1|SCALAR_FLAG),
+       OPN(RSQ, 1|SCALAR_FLAG),
+       OPN(SGE, 2),
+       OPN(SLT, 2),
+       OPN(SUB, 2),
+       OPN(SWZ, 1),
+       OPN(XPD, 2),
+       OPN(RCC, 0), //extra
+       OPN(PRINT, 0),
+       OPN(END, 0),
+ };
+ #undef OPN
+       
+ int r300VertexProgUpdateParams(GLcontext *ctx, struct r300_vertex_program_cont *vp, float *dst)
+ {
+       int pi;
+       struct gl_vertex_program *mesa_vp = &vp->mesa_program;
+       float *dst_o=dst;
+         struct gl_program_parameter_list *paramList;
+       
+       if (mesa_vp->IsNVProgram) {
 -      GLint tokens[6] = { STATE_MATRIX, STATE_MVP, 0, 0, 0, STATE_MATRIX };
++              _mesa_load_tracked_matrices(ctx);
+               
+               for (pi=0; pi < MAX_NV_VERTEX_PROGRAM_PARAMS; pi++) {
+                       *dst++=ctx->VertexProgram.Parameters[pi][0];
+                       *dst++=ctx->VertexProgram.Parameters[pi][1];
+                       *dst++=ctx->VertexProgram.Parameters[pi][2];
+                       *dst++=ctx->VertexProgram.Parameters[pi][3];
+               }
+               return dst - dst_o;
+       }
+       
+       assert(mesa_vp->Base.Parameters);
+       _mesa_load_state_parameters(ctx, mesa_vp->Base.Parameters);
+       
+       if(mesa_vp->Base.Parameters->NumParameters * 4 > VSF_MAX_FRAGMENT_LENGTH){
+               fprintf(stderr, "%s:Params exhausted\n", __FUNCTION__);
+               exit(-1);
+       }
+       
+         paramList = mesa_vp->Base.Parameters;
+       for(pi=0; pi < paramList->NumParameters; pi++){
+               switch(paramList->Parameters[pi].Type){
+                       
+               case PROGRAM_STATE_VAR:
+               case PROGRAM_NAMED_PARAM:
+                       //fprintf(stderr, "%s", vp->Parameters->Parameters[pi].Name);
+               case PROGRAM_CONSTANT:
+                       *dst++=paramList->ParameterValues[pi][0];
+                       *dst++=paramList->ParameterValues[pi][1];
+                       *dst++=paramList->ParameterValues[pi][2];
+                       *dst++=paramList->ParameterValues[pi][3];
+               break;
+               
+               default: _mesa_problem(NULL, "Bad param type in %s", __FUNCTION__);
+               }
+       
+       }
+       
+       return dst - dst_o;
+ }
+               
+ static unsigned long t_dst_mask(GLuint mask)
+ {
+       /* WRITEMASK_* is equivalent to VSF_FLAG_* */
+       return mask & VSF_FLAG_ALL;
+ }
+ static unsigned long t_dst_class(enum register_file file)
+ {
+       
+       switch(file){
+               case PROGRAM_TEMPORARY:
+                       return VSF_OUT_CLASS_TMP;
+               case PROGRAM_OUTPUT:
+                       return VSF_OUT_CLASS_RESULT;
+               case PROGRAM_ADDRESS:
+                       return VSF_OUT_CLASS_ADDR;
+               /*      
+               case PROGRAM_INPUT:
+               case PROGRAM_LOCAL_PARAM:
+               case PROGRAM_ENV_PARAM:
+               case PROGRAM_NAMED_PARAM:
+               case PROGRAM_STATE_VAR:
+               case PROGRAM_WRITE_ONLY:
+               case PROGRAM_ADDRESS:
+               */
+               default:
+                       fprintf(stderr, "problem in %s", __FUNCTION__);
+                       exit(0);
+       }
+ }
+ static unsigned long t_dst_index(struct r300_vertex_program *vp, struct prog_dst_register *dst)
+ {
+       if(dst->File == PROGRAM_OUTPUT)
+               return vp->outputs[dst->Index];
+       return dst->Index;
+ }
+ static unsigned long t_src_class(enum register_file file)
+ {
+       
+       switch(file){
+               case PROGRAM_TEMPORARY:
+                       return VSF_IN_CLASS_TMP;
+                       
+               case PROGRAM_INPUT:
+                       return VSF_IN_CLASS_ATTR;
+                       
+               case PROGRAM_LOCAL_PARAM:
+               case PROGRAM_ENV_PARAM:
+               case PROGRAM_NAMED_PARAM:
+               case PROGRAM_STATE_VAR:
+                       return VSF_IN_CLASS_PARAM;
+               /*      
+               case PROGRAM_OUTPUT:
+               case PROGRAM_WRITE_ONLY:
+               case PROGRAM_ADDRESS:
+               */
+               default:
+                       fprintf(stderr, "problem in %s", __FUNCTION__);
+                       exit(0);
+       }
+ }
+ static __inline unsigned long t_swizzle(GLubyte swizzle)
+ {
+ /* this is in fact a NOP as the Mesa SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+       return swizzle;
+ }
+ #if 0
+ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller)
+ {
+       int i;
+       
+       if(vp == NULL){
+               fprintf(stderr, "vp null in call to %s from %s\n", __FUNCTION__, caller);
+               return ;
+       }
+       
+       fprintf(stderr, "%s:<", caller);
+       for(i=0; i < VERT_ATTRIB_MAX; i++)
+               fprintf(stderr, "%d ", vp->inputs[i]);
+       fprintf(stderr, ">\n");
+       
+ }
+ #endif
+ static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src)
+ {
+       int i;
+       int max_reg=-1;
+       
+       if(src->File == PROGRAM_INPUT){
+               if(vp->inputs[src->Index] != -1)
+                       return vp->inputs[src->Index];
+               
+               for(i=0; i < VERT_ATTRIB_MAX; i++)
+                       if(vp->inputs[i] > max_reg)
+                               max_reg=vp->inputs[i];
+               
+               vp->inputs[src->Index]=max_reg+1;
+               
+               //vp_dump_inputs(vp, __FUNCTION__);     
+               
+               return vp->inputs[src->Index];
+       }else{
+               if (src->Index < 0) {
+                       fprintf(stderr, "WARNING negative offsets for indirect addressing do not work\n");
+                       return 0;
+               }
+               return src->Index;
+       }
+ }
+ static unsigned long t_src(struct r300_vertex_program *vp, struct prog_src_register *src)
+ {
+       /* src->NegateBase uses the NEGATE_ flags from program_instruction.h,
+        * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+        */
+       return MAKE_VSF_SOURCE(t_src_index(vp, src),
+                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                               t_swizzle(GET_SWZ(src->Swizzle, 1)),
+                               t_swizzle(GET_SWZ(src->Swizzle, 2)),
+                               t_swizzle(GET_SWZ(src->Swizzle, 3)),
+                               t_src_class(src->File),
+                               src->NegateBase) | (src->RelAddr << 4);
+ }
+ static unsigned long t_src_scalar(struct r300_vertex_program *vp, struct prog_src_register *src)
+ {
+                       
+       return MAKE_VSF_SOURCE(t_src_index(vp, src),
+                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                               t_swizzle(GET_SWZ(src->Swizzle, 0)),
+                               t_src_class(src->File),
+                               src->NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src->RelAddr << 4);
+ }
+ static unsigned long t_opcode(enum prog_opcode opcode)
+ {
+       switch(opcode){
+               case OPCODE_ARL: return R300_VPI_OUT_OP_ARL;
+               case OPCODE_DST: return R300_VPI_OUT_OP_DST;
+               case OPCODE_EX2: return R300_VPI_OUT_OP_EX2;
+               case OPCODE_EXP: return R300_VPI_OUT_OP_EXP;
+               case OPCODE_FRC: return R300_VPI_OUT_OP_FRC;
+               case OPCODE_LG2: return R300_VPI_OUT_OP_LG2;
+               case OPCODE_LOG: return R300_VPI_OUT_OP_LOG;
+               case OPCODE_MAX: return R300_VPI_OUT_OP_MAX;
+               case OPCODE_MIN: return R300_VPI_OUT_OP_MIN;
+               case OPCODE_MUL: return R300_VPI_OUT_OP_MUL;
+               case OPCODE_RCP: return R300_VPI_OUT_OP_RCP;
+               case OPCODE_RSQ: return R300_VPI_OUT_OP_RSQ;
+               case OPCODE_SGE: return R300_VPI_OUT_OP_SGE;
+               case OPCODE_SLT: return R300_VPI_OUT_OP_SLT;
+               case OPCODE_DP4: return R300_VPI_OUT_OP_DOT;
+               
+               default: 
+                       fprintf(stderr, "%s: Should not be called with opcode %d!", __FUNCTION__, opcode);
+       }
+       exit(-1);
+       return 0;
+ }
+ static unsigned long op_operands(enum prog_opcode opcode)
+ {
+       int i;
+       
+       /* Can we trust mesas opcodes to be in order ? */
+       for(i=0; i < sizeof(op_names) / sizeof(*op_names); i++)
+               if(op_names[i].opcode == opcode)
+                       return op_names[i].ip;
+       
+       fprintf(stderr, "op %d not found in op_names\n", opcode);
+       exit(-1);
+       return 0;
+ }
+ static GLboolean valid_dst(struct r300_vertex_program *vp, struct prog_dst_register *dst)
+ {
+       if(dst->File == PROGRAM_OUTPUT && vp->outputs[dst->Index] == -1){
+               WARN_ONCE("Output %d not used by fragment program\n", dst->Index);
+               return GL_FALSE;
+       }else if(dst->File == PROGRAM_ADDRESS) {
+               assert(dst->Index == 0);
+       }
+       
+       return GL_TRUE;
+ }
+ /* TODO: Get rid of t_src_class call */
+ #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \
+                      ((t_src_class(a.File) == VSF_IN_CLASS_PARAM && \
+                        t_src_class(b.File) == VSF_IN_CLASS_PARAM) || \
+                       (t_src_class(a.File) == VSF_IN_CLASS_ATTR && \
+                        t_src_class(b.File) == VSF_IN_CLASS_ATTR)))) \
+                        
+ #define ZERO_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
+                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
+                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
+                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
+                                  
+ #define ZERO_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
+                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
+                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
+                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
+ #define ZERO_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
+                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
+                                   SWIZZLE_ZERO, SWIZZLE_ZERO, \
+                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
+                                  
+ #define ONE_SRC_0 (MAKE_VSF_SOURCE(t_src_index(vp, &src[0]), \
+                                   SWIZZLE_ONE, SWIZZLE_ONE, \
+                                   SWIZZLE_ONE, SWIZZLE_ONE, \
+                                   t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4))
+                                  
+ #define ONE_SRC_1 (MAKE_VSF_SOURCE(t_src_index(vp, &src[1]), \
+                                   SWIZZLE_ONE, SWIZZLE_ONE, \
+                                   SWIZZLE_ONE, SWIZZLE_ONE, \
+                                   t_src_class(src[1].File), VSF_FLAG_NONE) | (src[1].RelAddr << 4))
+ #define ONE_SRC_2 (MAKE_VSF_SOURCE(t_src_index(vp, &src[2]), \
+                                   SWIZZLE_ONE, SWIZZLE_ONE, \
+                                   SWIZZLE_ONE, SWIZZLE_ONE, \
+                                   t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4))
+                                  
+ /* DP4 version seems to trigger some hw peculiarity */
+ //#define PREFER_DP4
+ #define FREE_TEMPS() \
+       do { \
+               if(u_temp_i < vp->num_temporaries) { \
+                       WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_i); \
+                       vp->native = GL_FALSE; \
+               } \
+               u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \
+       } while (0)
+ static void r300_translate_vertex_shader(struct r300_vertex_program *vp, struct prog_instruction *vpi)
+ {
+       int i, cur_reg=0;
+       VERTEX_SHADER_INSTRUCTION *o_inst;
+       unsigned long operands;
+       int are_srcs_scalar;
+       unsigned long hw_op;
+       /* Initial value should be last tmp reg that hw supports.
+          Strangely enough r300 doesnt mind even though these would be out of range.
+          Smart enough to realize that it doesnt need it? */
+       int u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1;
+       struct prog_src_register src[3];
+       vp->pos_end=0; /* Not supported yet */
+       vp->program.length=0;
+       /*vp->num_temporaries=mesa_vp->Base.NumTemporaries;*/
+       
+       for(i=0; i < VERT_ATTRIB_MAX; i++)
+               vp->inputs[i] = -1;
+       for(i=0; i < VERT_RESULT_MAX; i++)
+               vp->outputs[i] = -1;
+       
+       assert(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS));
+       
+       /* Assign outputs */
+       if(vp->key.OutputsWritten & (1 << VERT_RESULT_HPOS))
+               vp->outputs[VERT_RESULT_HPOS] = cur_reg++;
+       
+       if(vp->key.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+               vp->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+       
+       if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL0))
+               vp->outputs[VERT_RESULT_COL0] = cur_reg++;
+       
+       if(vp->key.OutputsWritten & (1 << VERT_RESULT_COL1))
+               vp->outputs[VERT_RESULT_COL1] = cur_reg++;
+       
+ #if 0 /* Not supported yet */
+       if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0))
+               vp->outputs[VERT_RESULT_BFC0] = cur_reg++;
+       
+       if(vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1))
+               vp->outputs[VERT_RESULT_BFC1] = cur_reg++;
+       
+       if(vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC))
+               vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+ #endif
+       
+       for(i=VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++)
+               if(vp->key.OutputsWritten & (1 << i))
+                       vp->outputs[i] = cur_reg++;
+       
+       vp->translated = GL_TRUE;
+       vp->native = GL_TRUE;
+       
+       o_inst=vp->program.body.i;
+       for(; vpi->Opcode != OPCODE_END; vpi++, o_inst++){
+               FREE_TEMPS();
+               if(!valid_dst(vp, &vpi->DstReg))
+               {
+                       /* redirect result to unused temp */
+                       vpi->DstReg.File = PROGRAM_TEMPORARY;
+                       vpi->DstReg.Index = u_temp_i;
+               }
+               
+               operands=op_operands(vpi->Opcode);
+               are_srcs_scalar=operands & SCALAR_FLAG;
+               operands &= OP_MASK;
+               
+               for(i=0; i < operands; i++)
+                       src[i]=vpi->SrcReg[i];
+               
+               if(operands == 3){ /* TODO: scalars */
+                       if( CMP_SRCS(src[1], src[2]) || CMP_SRCS(src[0], src[2]) ){
+                               o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
+                                               VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
+                               
+                               o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[2]),
+                                               SWIZZLE_X, SWIZZLE_Y,
+                                               SWIZZLE_Z, SWIZZLE_W,
+                                               t_src_class(src[2].File), VSF_FLAG_NONE) | (src[2].RelAddr << 4);
+                               o_inst->src2=ZERO_SRC_2;
+                               o_inst->src3=ZERO_SRC_2;
+                               o_inst++;
+                                               
+                               src[2].File=PROGRAM_TEMPORARY;
+                               src[2].Index=u_temp_i;
+                               src[2].RelAddr=0;
+                               u_temp_i--;
+                       }
+                       
+               }
+               
+               if(operands >= 2){
+                       if( CMP_SRCS(src[1], src[0]) ){
+                               o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, u_temp_i,
+                                               VSF_FLAG_ALL, VSF_OUT_CLASS_TMP);
+                               
+                               o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                               SWIZZLE_X, SWIZZLE_Y,
+                                               SWIZZLE_Z, SWIZZLE_W,
+                                               t_src_class(src[0].File), VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                               o_inst->src2=ZERO_SRC_0;
+                               o_inst->src3=ZERO_SRC_0;
+                               o_inst++;
+                                               
+                               src[0].File=PROGRAM_TEMPORARY;
+                               src[0].Index=u_temp_i;
+                               src[0].RelAddr=0;
+                               u_temp_i--;
+                       }
+               }
+               
+               /* These ops need special handling. */
+               switch(vpi->Opcode){
+               case OPCODE_POW:
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_POW, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=t_src_scalar(vp, &src[0]);
+                       o_inst->src2=ZERO_SRC_0;
+                       o_inst->src3=t_src_scalar(vp, &src[1]);
+                       goto next;
+                       
+               case OPCODE_MOV://ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{} {ZERO ZERO ZERO ZERO} 
+               case OPCODE_SWZ:
+ #if 1
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=ZERO_SRC_0;
+                       o_inst->src3=ZERO_SRC_0;
+ #else
+                       hw_op=(src[0].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
+                       
+                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=ONE_SRC_0;
+                       o_inst->src3=ZERO_SRC_0;
+ #endif                        
+                       goto next;
+                       
+               case OPCODE_ADD:
+ #if 1
+                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+                               src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
+                       
+                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=ONE_SRC_0;
+                       o_inst->src2=t_src(vp, &src[0]);
+                       o_inst->src3=t_src(vp, &src[1]);
+ #else
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=t_src(vp, &src[1]);
+                       o_inst->src3=ZERO_SRC_1;
+                       
+ #endif
+                       goto next;
+                       
+               case OPCODE_MAD:
+                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+                               src[1].File == PROGRAM_TEMPORARY &&
+                               src[2].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
+                       
+                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=t_src(vp, &src[1]);
+                       o_inst->src3=t_src(vp, &src[2]);
+                       goto next;
+                       
+               case OPCODE_MUL: /* HW mul can take third arg but appears to have some other limitations. */
+                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+                               src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
+                       
+                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=t_src(vp, &src[1]);
+                       o_inst->src3=ZERO_SRC_1;
+                       goto next;
+                       
+               case OPCODE_DP3://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ZERO} PARAM 0{} {X Y Z ZERO} 
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                       SWIZZLE_ZERO,
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                       SWIZZLE_ZERO,
+                                       t_src_class(src[1].File),
+                                       src[1].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+                       o_inst->src3=ZERO_SRC_1;
+                       goto next;
+               case OPCODE_SUB://ADD RESULT 1.X Y Z W TMP 0{} {X Y Z W} PARAM 1{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+ #if 1
+                       hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+                               src[1].File == PROGRAM_TEMPORARY) ? R300_VPI_OUT_OP_MAD_2 : R300_VPI_OUT_OP_MAD;
+                       
+                       o_inst->op=MAKE_VSF_OP(hw_op, t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=ONE_SRC_0;
+                       o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                       t_src_class(src[1].File),
+                                       (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE)  | (src[1].RelAddr << 4);
+ #else
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)),
+                                       t_src_class(src[1].File),
+                                       (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+                       o_inst->src3=0;
+ #endif
+                       goto next;
+                       
+               case OPCODE_ABS://MAX RESULT 1.X Y Z W PARAM 0{} {X Y Z W} PARAM 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAX, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)),
+                                       t_src_class(src[0].File),
+                                       (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       o_inst->src3=0;
+                       goto next;
+                       
+               case OPCODE_FLR:
+               /* FRC TMP 0.X Y Z W PARAM 0{} {X Y Z W} 
+                  ADD RESULT 1.X Y Z W PARAM 0{} {X Y Z W} TMP 0{X Y Z W } {X Y Z W} neg Xneg Yneg Zneg W */
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_FRC, u_temp_i,
+                                       t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
+                       
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=ZERO_SRC_0;
+                       o_inst->src3=ZERO_SRC_0;
+                       o_inst++;
+                       
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_ADD, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       
+                       o_inst->src1=t_src(vp, &src[0]);
+                       o_inst->src2=MAKE_VSF_SOURCE(u_temp_i,
+                                       VSF_IN_COMPONENT_X,
+                                       VSF_IN_COMPONENT_Y,
+                                       VSF_IN_COMPONENT_Z,
+                                       VSF_IN_COMPONENT_W,
+                                       VSF_IN_CLASS_TMP,
+                                       /* Not 100% sure about this */
+                                       (!src[0].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE/*VSF_FLAG_ALL*/);
+                       o_inst->src3=ZERO_SRC_0;
+                       u_temp_i--;
+                       goto next;
+                       
+               case OPCODE_LG2:// LG2 RESULT 1.X Y Z W PARAM 0{} {X X X X}
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LG2, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       o_inst->src2=ZERO_SRC_0;
+                       o_inst->src3=ZERO_SRC_0;
+                       goto next;
+                       
+               case OPCODE_LIT://LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W} 
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_LIT, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       /* NOTE: Users swizzling might not work. */
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+                                       VSF_IN_COMPONENT_ZERO, // z
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+                                       VSF_IN_COMPONENT_ZERO, // z
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       o_inst->src3=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+                                       VSF_IN_COMPONENT_ZERO, // z
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       goto next;
+                       
+               case OPCODE_DPH://DOT RESULT 1.X Y Z W PARAM 0{} {X Y Z ONE} PARAM 0{} {X Y Z W} 
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_DOT, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)),
+                                       VSF_IN_COMPONENT_ONE,
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_XYZ : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       o_inst->src2=t_src(vp, &src[1]);
+                       o_inst->src3=ZERO_SRC_1;
+                       goto next;
+                       
+               case OPCODE_XPD:
+                       /* mul r0, r1.yzxw, r2.zxyw
+                          mad r0, -r2.yzxw, r1.zxyw, r0
+                          NOTE: might need MAD_2
+                        */
+                       
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, u_temp_i,
+                                       t_dst_mask(vpi->DstReg.WriteMask), VSF_OUT_CLASS_TMP);
+                       
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+                                       t_src_class(src[1].File),
+                                       src[1].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+                       
+                       o_inst->src3=ZERO_SRC_1;
+                       o_inst++;
+                       u_temp_i--;
+                       
+                       o_inst->op=MAKE_VSF_OP(R300_VPI_OUT_OP_MAD, t_dst_index(vp, &vpi->DstReg),
+                                       t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+                       
+                       o_inst->src1=MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 1)), // y
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 2)), // z
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 0)), // x
+                                       t_swizzle(GET_SWZ(src[1].Swizzle, 3)), // w
+                                       t_src_class(src[1].File),
+                                       (!src[1].NegateBase) ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[1].RelAddr << 4);
+                       
+                       o_inst->src2=MAKE_VSF_SOURCE(t_src_index(vp, &src[0]),
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 2)), // z
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 0)), // x
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 1)), // y
+                                       t_swizzle(GET_SWZ(src[0].Swizzle, 3)), // w
+                                       t_src_class(src[0].File),
+                                       src[0].NegateBase ? VSF_FLAG_ALL : VSF_FLAG_NONE) | (src[0].RelAddr << 4);
+                       
+                       o_inst->src3=MAKE_VSF_SOURCE(u_temp_i+1,
+                                       VSF_IN_COMPONENT_X,
+                                       VSF_IN_COMPONENT_Y,
+                                       VSF_IN_COMPONENT_Z,
+                                       VSF_IN_COMPONENT_W,
+                                       VSF_IN_CLASS_TMP,
+                                       VSF_FLAG_NONE);
+               
+                       goto next;
+               case OPCODE_RCC:
+                       fprintf(stderr, "Dont know how to handle op %d yet\n", vpi->Opcode);
+                       exit(-1);
+               break;
+               case OPCODE_END:
+                       break;
+               default:
+                       break;
+               }
+       
+               o_inst->op=MAKE_VSF_OP(t_opcode(vpi->Opcode), t_dst_index(vp, &vpi->DstReg),
+                               t_dst_mask(vpi->DstReg.WriteMask), t_dst_class(vpi->DstReg.File));
+               
+               if(are_srcs_scalar){
+                       switch(operands){
+                               case 1:
+                                       o_inst->src1=t_src_scalar(vp, &src[0]);
+                                       o_inst->src2=ZERO_SRC_0;
+                                       o_inst->src3=ZERO_SRC_0;
+                               break;
+                               
+                               case 2:
+                                       o_inst->src1=t_src_scalar(vp, &src[0]);
+                                       o_inst->src2=t_src_scalar(vp, &src[1]);
+                                       o_inst->src3=ZERO_SRC_1;
+                               break;
+                               
+                               case 3:
+                                       o_inst->src1=t_src_scalar(vp, &src[0]);
+                                       o_inst->src2=t_src_scalar(vp, &src[1]);
+                                       o_inst->src3=t_src_scalar(vp, &src[2]);
+                               break;
+                               
+                               default:
+                                       fprintf(stderr, "scalars and op RCC not handled yet");
+                                       exit(-1);
+                               break;
+                       }
+               }else{
+                       switch(operands){
+                               case 1:
+                                       o_inst->src1=t_src(vp, &src[0]);
+                                       o_inst->src2=ZERO_SRC_0;
+                                       o_inst->src3=ZERO_SRC_0;
+                               break;
+                       
+                               case 2:
+                                       o_inst->src1=t_src(vp, &src[0]);
+                                       o_inst->src2=t_src(vp, &src[1]);
+                                       o_inst->src3=ZERO_SRC_1;
+                               break;
+                       
+                               case 3:
+                                       o_inst->src1=t_src(vp, &src[0]);
+                                       o_inst->src2=t_src(vp, &src[1]);
+                                       o_inst->src3=t_src(vp, &src[2]);
+                               break;
+                       
+                               default:
+                                       fprintf(stderr, "scalars and op RCC not handled yet");
+                                       exit(-1);
+                               break;
+                       }
+               }
+               next: ;
+       }
+       
+       /* Will most likely segfault before we get here... fix later. */
+       if(o_inst - vp->program.body.i >= VSF_MAX_FRAGMENT_LENGTH/4) {
+               vp->program.length = 0;
+               vp->native = GL_FALSE;
+               return ;
+       }
+       vp->program.length=(o_inst - vp->program.body.i) * 4;
+ #if 0
+       fprintf(stderr, "hw program:\n");
+       for(i=0; i < vp->program.length; i++)
+               fprintf(stderr, "%08x\n", vp->program.body.d[i]);
+ #endif
+ }
+ static void position_invariant(struct gl_program *prog)
+ {
+       struct prog_instruction *vpi;
+       struct gl_program_parameter_list *paramList;
+       int i;
++      gl_state_index tokens[STATE_LENGTH] = { STATE_MVP_MATRIX, 0, 0, 0, 0 };
+ #ifdef PREFER_DP4
+       tokens[5] = STATE_MATRIX;
+ #else
+       tokens[5] = STATE_MATRIX_TRANSPOSE;
+ #endif
+       paramList = prog->Parameters;
+       vpi = _mesa_alloc_instructions (prog->NumInstructions + 4);
+       _mesa_init_instructions (vpi, prog->NumInstructions + 4);
+       for (i=0; i < 4; i++) {
+               GLint idx;
+               tokens[3] = tokens[4] = i;
+               idx = _mesa_add_state_reference(paramList, tokens);
+ #ifdef PREFER_DP4
+               vpi[i].Opcode = OPCODE_DP4;
+               vpi[i].StringPos = 0;
+               vpi[i].Data = 0;
+               vpi[i].DstReg.File = PROGRAM_OUTPUT;
+               vpi[i].DstReg.Index = VERT_RESULT_HPOS;
+               vpi[i].DstReg.WriteMask = 1 << i;
+               vpi[i].DstReg.CondMask = COND_TR;
+               vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
+               vpi[i].SrcReg[0].Index = idx;
+               vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+               vpi[i].SrcReg[1].File = PROGRAM_INPUT;
+               vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
+               vpi[i].SrcReg[1].Swizzle = SWIZZLE_XYZW;
+ #else
+               if (i == 0)
+                       vpi[i].Opcode = OPCODE_MUL;
+               else
+                       vpi[i].Opcode = OPCODE_MAD;
+               vpi[i].StringPos = 0;
+               vpi[i].Data = 0;
+               if (i == 3)
+                       vpi[i].DstReg.File = PROGRAM_OUTPUT;
+               else
+                       vpi[i].DstReg.File = PROGRAM_TEMPORARY;
+               vpi[i].DstReg.Index = 0;
+               vpi[i].DstReg.WriteMask = 0xf;
+               vpi[i].DstReg.CondMask = COND_TR;
+               vpi[i].SrcReg[0].File = PROGRAM_STATE_VAR;
+               vpi[i].SrcReg[0].Index = idx;
+               vpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+               vpi[i].SrcReg[1].File = PROGRAM_INPUT;
+               vpi[i].SrcReg[1].Index = VERT_ATTRIB_POS;
+               vpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(i, i, i, i);
+               if (i > 0) {
+                       vpi[i].SrcReg[2].File = PROGRAM_TEMPORARY;
+                       vpi[i].SrcReg[2].Index = 0;
+                       vpi[i].SrcReg[2].Swizzle = SWIZZLE_XYZW;
+               }
+ #endif                                        
+       }
+       _mesa_copy_instructions (&vpi[i], prog->Instructions, prog->NumInstructions);
+       free(prog->Instructions);
+       prog->Instructions = vpi;
+       prog->NumInstructions += 4;
+       vpi = &prog->Instructions[prog->NumInstructions-1];
+       assert(vpi->Opcode == OPCODE_END);
+ }
+ static void insert_wpos(struct r300_vertex_program *vp,
+                      struct gl_program *prog,
+                      GLuint temp_index)
+ {
+       struct prog_instruction *vpi;
+       struct prog_instruction *vpi_insert;
+       int i = 0;
+       
+       vpi = _mesa_alloc_instructions (prog->NumInstructions + 2);
+       _mesa_init_instructions (vpi, prog->NumInstructions + 2);
+       /* all but END */
+       _mesa_copy_instructions (vpi, prog->Instructions, prog->NumInstructions - 1);
+       /* END */
+       _mesa_copy_instructions (&vpi[prog->NumInstructions + 1],
+                                &prog->Instructions[prog->NumInstructions - 1],
+                                1);
+       vpi_insert = &vpi[prog->NumInstructions - 1];
+       vpi_insert[i].Opcode = OPCODE_MOV;
+       vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
+       vpi_insert[i].DstReg.Index = VERT_RESULT_HPOS;
+       vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
+       vpi_insert[i].DstReg.CondMask = COND_TR;
+       vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+       vpi_insert[i].SrcReg[0].Index = temp_index;
+       vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+       i++;
+       vpi_insert[i].Opcode = OPCODE_MOV;
+       vpi_insert[i].DstReg.File = PROGRAM_OUTPUT;
+       vpi_insert[i].DstReg.Index = VERT_RESULT_TEX0+vp->wpos_idx;
+       vpi_insert[i].DstReg.WriteMask = WRITEMASK_XYZW;
+       vpi_insert[i].DstReg.CondMask = COND_TR;
+       vpi_insert[i].SrcReg[0].File = PROGRAM_TEMPORARY;
+       vpi_insert[i].SrcReg[0].Index = temp_index;
+       vpi_insert[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+       i++;
+       free(prog->Instructions);
+       prog->Instructions = vpi;
+       prog->NumInstructions += i;
+       vpi = &prog->Instructions[prog->NumInstructions-1];
+       assert(vpi->Opcode == OPCODE_END);
+ }
+ static void pos_as_texcoord(struct r300_vertex_program *vp,
+                           struct gl_program *prog)
+ {
+       struct prog_instruction *vpi;
+       GLuint tempregi = prog->NumTemporaries;
+       /* should do something else if no temps left... */
+       prog->NumTemporaries++;
+       for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++){
+               if( vpi->DstReg.File == PROGRAM_OUTPUT &&
+                   vpi->DstReg.Index == VERT_RESULT_HPOS ){
+                       vpi->DstReg.File = PROGRAM_TEMPORARY;
+                       vpi->DstReg.Index = tempregi;
+               }
+       }
+       insert_wpos(vp, prog, tempregi);
+ }
+ static struct r300_vertex_program *build_program(struct r300_vertex_program_key *wanted_key,
+                                                struct gl_vertex_program *mesa_vp,
+                                                GLint wpos_idx)
+ {
+       struct r300_vertex_program *vp;
+       vp = _mesa_calloc(sizeof(*vp));
+       _mesa_memcpy(&vp->key, wanted_key, sizeof(vp->key));
+       vp->wpos_idx = wpos_idx;
+       if(mesa_vp->IsPositionInvariant) {
+               position_invariant(&mesa_vp->Base);
+       }
+       if(wpos_idx > -1)
+               pos_as_texcoord(vp, &mesa_vp->Base);
+       assert(mesa_vp->Base.NumInstructions);
+       vp->num_temporaries=mesa_vp->Base.NumTemporaries;
+       r300_translate_vertex_shader(vp, mesa_vp->Base.Instructions);
+       return vp;      
+ }
+ void r300_select_vertex_shader(r300ContextPtr r300)
+ {
+       GLcontext *ctx = ctx = r300->radeon.glCtx;
+       GLuint InputsRead;
+       struct r300_vertex_program_key wanted_key = { 0 };
+       GLint i;
+       struct r300_vertex_program_cont *vpc;
+       struct r300_vertex_program *vp;
+       GLint wpos_idx;
+       vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+       InputsRead = ctx->FragmentProgram._Current->Base.InputsRead;
+       wanted_key.OutputsWritten |= 1 << VERT_RESULT_HPOS;
+       wpos_idx = -1;
+       if (InputsRead & FRAG_BIT_WPOS){
+               for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+                       if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
+                               break;
+               
+               if(i == ctx->Const.MaxTextureUnits){
+                       fprintf(stderr, "\tno free texcoord found\n");
+                       exit(0);
+               }
+               InputsRead |= (FRAG_BIT_TEX0 << i);
+               wpos_idx = i;
+       }
+       if (InputsRead & FRAG_BIT_COL0)
+               wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL0;
+       if ((InputsRead & FRAG_BIT_COL1) /*||
+           (InputsRead & FRAG_BIT_FOGC)*/)
+               wanted_key.OutputsWritten |= 1 << VERT_RESULT_COL1;
+       
+       for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+               if (InputsRead & (FRAG_BIT_TEX0 << i))
+                       wanted_key.OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+       wanted_key.InputsRead = vpc->mesa_program.Base.InputsRead;
+       if(vpc->mesa_program.IsPositionInvariant) {
+               /* we wan't position don't we ? */
+               wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS);
+       }
+       for (vp = vpc->progs; vp; vp = vp->next)
+               if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
+                       r300->selected_vp = vp;
+                       return ;
+               }
+       //_mesa_print_program(&vpc->mesa_program.Base);
+       vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx);
+       vp->next = vpc->progs;
+       vpc->progs = vp;
+       r300->selected_vp = vp;
+ }
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index c67831385f688c96608a2a816ca3e89bb5c8b335,0000000000000000000000000000000000000000..ed479a7f614b29432c9e799fce685e5f6520f783
mode 100644,000000..100644
--- /dev/null
@@@ -1,225 -1,0 +1,239 @@@
 +/*
 + * Mesa 3-D graphics library
 + * Version:  6.5.3
 + *
 + * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included
 + * in all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 + */
 +
 +
 +#include "glheader.h"
 +#include "imports.h"
 +#include "mtypes.h"
 +#include "prog_instruction.h"
 +
 +
 +/**
 + * Initialize program instruction fields to defaults.
 + * \param inst  first instruction to initialize
 + * \param count  number of instructions to initialize
 + */
 +void
 +_mesa_init_instructions(struct prog_instruction *inst, GLuint count)
 +{
 +   GLuint i;
 +
 +   _mesa_bzero(inst, count * sizeof(struct prog_instruction));
 +
 +   for (i = 0; i < count; i++) {
 +      inst[i].SrcReg[0].File = PROGRAM_UNDEFINED;
 +      inst[i].SrcReg[0].Swizzle = SWIZZLE_NOOP;
 +      inst[i].SrcReg[1].File = PROGRAM_UNDEFINED;
 +      inst[i].SrcReg[1].Swizzle = SWIZZLE_NOOP;
 +      inst[i].SrcReg[2].File = PROGRAM_UNDEFINED;
 +      inst[i].SrcReg[2].Swizzle = SWIZZLE_NOOP;
 +
 +      inst[i].DstReg.File = PROGRAM_UNDEFINED;
 +      inst[i].DstReg.WriteMask = WRITEMASK_XYZW;
 +      inst[i].DstReg.CondMask = COND_TR;
 +      inst[i].DstReg.CondSwizzle = SWIZZLE_NOOP;
 +
 +      inst[i].SaturateMode = SATURATE_OFF;
 +      inst[i].Precision = FLOAT32;
 +   }
 +}
 +
 +
 +/**
 + * Allocate an array of program instructions.
 + * \param numInst  number of instructions
 + * \return pointer to instruction memory
 + */
 +struct prog_instruction *
 +_mesa_alloc_instructions(GLuint numInst)
 +{
 +   return (struct prog_instruction *)
 +      _mesa_calloc(numInst * sizeof(struct prog_instruction));
 +}
 +
 +
 +/**
 + * Reallocate memory storing an array of program instructions.
 + * This is used when we need to append additional instructions onto an
 + * program.
 + * \param oldInst  pointer to first of old/src instructions
 + * \param numOldInst  number of instructions at <oldInst>
 + * \param numNewInst  desired size of new instruction array.
 + * \return  pointer to start of new instruction array.
 + */
 +struct prog_instruction *
 +_mesa_realloc_instructions(struct prog_instruction *oldInst,
 +                           GLuint numOldInst, GLuint numNewInst)
 +{
 +   struct prog_instruction *newInst;
 +
 +   newInst = (struct prog_instruction *)
 +      _mesa_realloc(oldInst,
 +                    numOldInst * sizeof(struct prog_instruction),
 +                    numNewInst * sizeof(struct prog_instruction));
 +
 +   return newInst;
 +}
 +
 +
++/**
++ * Copy an array of program instructions.
++ * \param dest  pointer to destination.
++ * \param src  pointer to source.
++ * \param n  number of instructions to copy.
++ * \return pointer to destination.
++ */
++struct prog_instruction *
++_mesa_copy_instructions(struct prog_instruction *dest,
++                        const struct prog_instruction *src, GLuint n)
++{
++   return _mesa_memcpy(dest, src, n * sizeof(struct prog_instruction));
++}
++
 +
 +/**
 + * Basic info about each instruction
 + */
 +struct instruction_info
 +{
 +   gl_inst_opcode Opcode;
 +   const char *Name;
 +   GLuint NumSrcRegs;
 +};
 +
 +/**
 + * Instruction info
 + * \note Opcode should equal array index!
 + */
 +static const struct instruction_info InstInfo[MAX_OPCODE] = {
 +   { OPCODE_NOP,    "NOP",   0 },
 +   { OPCODE_ABS,    "ABS",   1 },
 +   { OPCODE_ADD,    "ADD",   2 },
 +   { OPCODE_ARA,    "ARA",   1 },
 +   { OPCODE_ARL,    "ARL",   1 },
 +   { OPCODE_ARL_NV, "ARL",   1 },
 +   { OPCODE_ARR,    "ARL",   1 },
 +   { OPCODE_BGNLOOP,"BGNLOOP", 0 },
 +   { OPCODE_BGNSUB, "BGNSUB", 0 },
 +   { OPCODE_BRA,    "BRA",   0 },
 +   { OPCODE_BRK,    "BRK",   0 },
 +   { OPCODE_CAL,    "CAL",   0 },
 +   { OPCODE_CMP,    "CMP",   3 },
 +   { OPCODE_CONT,   "CONT",  1 },
 +   { OPCODE_COS,    "COS",   1 },
 +   { OPCODE_DDX,    "DDX",   1 },
 +   { OPCODE_DDY,    "DDY",   1 },
 +   { OPCODE_DP3,    "DP3",   2 },
 +   { OPCODE_DP4,    "DP4",   2 },
 +   { OPCODE_DPH,    "DPH",   2 },
 +   { OPCODE_DST,    "DST",   2 },
 +   { OPCODE_ELSE,   "ELSE",  0 },
 +   { OPCODE_END,    "END",   0 },
 +   { OPCODE_ENDIF,  "ENDIF", 0 },
 +   { OPCODE_ENDLOOP,"ENDLOOP", 0 },
 +   { OPCODE_ENDSUB, "ENDSUB", 0 },
 +   { OPCODE_EX2,    "EX2",   1 },
 +   { OPCODE_EXP,    "EXP",   1 },
 +   { OPCODE_FLR,    "FLR",   1 },
 +   { OPCODE_FRC,    "FRC",   1 },
 +   { OPCODE_IF,     "IF",    0 },
 +   { OPCODE_INT,    "INT",   1 },
 +   { OPCODE_KIL,    "KIL",   1 },
 +   { OPCODE_KIL_NV, "KIL",   0 },
 +   { OPCODE_LG2,    "LG2",   1 },
 +   { OPCODE_LIT,    "LIT",   1 },
 +   { OPCODE_LOG,    "LOG",   1 },
 +   { OPCODE_LRP,    "LRP",   3 },
 +   { OPCODE_MAD,    "MAD",   3 },
 +   { OPCODE_MAX,    "MAX",   2 },
 +   { OPCODE_MIN,    "MIN",   2 },
 +   { OPCODE_MOV,    "MOV",   1 },
 +   { OPCODE_MUL,    "MUL",   2 },
 +   { OPCODE_NOISE1, "NOISE1", 1 },
 +   { OPCODE_NOISE2, "NOISE2", 1 },
 +   { OPCODE_NOISE3, "NOISE3", 1 },
 +   { OPCODE_NOISE4, "NOISE4", 1 },
 +   { OPCODE_PK2H,   "PK2H",  1 },
 +   { OPCODE_PK2US,  "PK2US", 1 },
 +   { OPCODE_PK4B,   "PK4B",  1 },
 +   { OPCODE_PK4UB,  "PK4UB", 1 },
 +   { OPCODE_POW,    "POW",   2 },
 +   { OPCODE_POPA,   "POPA",  0 },
 +   { OPCODE_PRINT,  "PRINT", 1 },
 +   { OPCODE_PUSHA,  "PUSHA", 0 },
 +   { OPCODE_RCC,    "RCC",   1 },
 +   { OPCODE_RCP,    "RCP",   1 },
 +   { OPCODE_RET,    "RET",   0 },
 +   { OPCODE_RFL,    "RFL",   1 },
 +   { OPCODE_RSQ,    "RSQ",   1 },
 +   { OPCODE_SCS,    "SCS",   1 },
 +   { OPCODE_SEQ,    "SEQ",   2 },
 +   { OPCODE_SFL,    "SFL",   0 },
 +   { OPCODE_SGE,    "SGE",   2 },
 +   { OPCODE_SGT,    "SGT",   2 },
 +   { OPCODE_SIN,    "SIN",   1 },
 +   { OPCODE_SLE,    "SLE",   2 },
 +   { OPCODE_SLT,    "SLT",   2 },
 +   { OPCODE_SNE,    "SNE",   2 },
 +   { OPCODE_SSG,    "SSG",   1 },
 +   { OPCODE_STR,    "STR",   0 },
 +   { OPCODE_SUB,    "SUB",   2 },
 +   { OPCODE_SWZ,    "SWZ",   1 },
 +   { OPCODE_TEX,    "TEX",   1 },
 +   { OPCODE_TXB,    "TXB",   1 },
 +   { OPCODE_TXD,    "TXD",   3 },
 +   { OPCODE_TXL,    "TXL",   1 },
 +   { OPCODE_TXP,    "TXP",   1 },
 +   { OPCODE_TXP_NV, "TXP",   1 },
 +   { OPCODE_UP2H,   "UP2H",  1 },
 +   { OPCODE_UP2US,  "UP2US", 1 },
 +   { OPCODE_UP4B,   "UP4B",  1 },
 +   { OPCODE_UP4UB,  "UP4UB", 1 },
 +   { OPCODE_X2D,    "X2D",   3 },
 +   { OPCODE_XPD,    "XPD",   2 }
 +};
 +
 +
 +/**
 + * Return the number of src registers for the given instruction/opcode.
 + */
 +GLuint
 +_mesa_num_inst_src_regs(gl_inst_opcode opcode)
 +{
 +   ASSERT(opcode == InstInfo[opcode].Opcode);
 +   ASSERT(OPCODE_XPD == InstInfo[OPCODE_XPD].Opcode);
 +   return InstInfo[opcode].NumSrcRegs;
 +}
 +
 +
 +/**
 + * Return string name for given program opcode.
 + */
 +const char *
 +_mesa_opcode_string(gl_inst_opcode opcode)
 +{
 +   ASSERT(opcode < MAX_OPCODE);
 +   return InstInfo[opcode].Name;
 +}
 +
index 14305f17d384bbdef13768c9194251a8a6318511,0000000000000000000000000000000000000000..66abb10cdb8f1930594affcc9dc02a0454ce300c
mode 100644,000000..100644
--- /dev/null
@@@ -1,442 -1,0 +1,446 @@@
 +/*
 + * Mesa 3-D graphics library
 + * Version:  6.5.3
 + *
 + * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
 + *
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the "Software"),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice shall be included
 + * in all copies or substantial portions of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 + */
 +
 +
 +/**
 + * \file prog_instruction.h
 + *
 + * Vertex/fragment program instruction datatypes and constants.
 + *
 + * \author Brian Paul
 + * \author Keith Whitwell
 + * \author Ian Romanick <idr@us.ibm.com>
 + */
 +
 +
 +#ifndef PROG_INSTRUCTION_H
 +#define PROG_INSTRUCTION_H
 +
 +
 +/**
 + * Swizzle indexes.
 + * Do not change!
 + */
 +/*@{*/
 +#define SWIZZLE_X    0
 +#define SWIZZLE_Y    1
 +#define SWIZZLE_Z    2
 +#define SWIZZLE_W    3
 +#define SWIZZLE_ZERO 4   /**< For SWZ instruction only */
 +#define SWIZZLE_ONE  5   /**< For SWZ instruction only */
 +#define SWIZZLE_NIL  7   /**< used during shader code gen (undefined value) */
 +/*@}*/
 +
 +#define MAKE_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
 +#define SWIZZLE_NOOP           MAKE_SWIZZLE4(0,1,2,3)
 +#define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
 +#define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
 +
 +#define SWIZZLE_XXXX MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)
 +#define SWIZZLE_YYYY MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)
 +#define SWIZZLE_ZZZZ MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)
 +#define SWIZZLE_WWWW MAKE_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)
 +
 +
 +/**
 + * Writemask values, 1 bit per component.
 + */
 +/*@{*/
 +#define WRITEMASK_X     0x1
 +#define WRITEMASK_Y     0x2
 +#define WRITEMASK_XY    0x3
 +#define WRITEMASK_Z     0x4
 +#define WRITEMASK_XZ    0x5
 +#define WRITEMASK_YZ    0x6
 +#define WRITEMASK_XYZ   0x7
 +#define WRITEMASK_W     0x8
 +#define WRITEMASK_XW    0x9
 +#define WRITEMASK_YW    0xa
 +#define WRITEMASK_XYW   0xb
 +#define WRITEMASK_ZW    0xc
 +#define WRITEMASK_XZW   0xd
 +#define WRITEMASK_YZW   0xe
 +#define WRITEMASK_XYZW  0xf
 +/*@}*/
 +
 +
 +/**
 + * Condition codes
 + */
 +/*@{*/
 +#define COND_GT  1  /**< greater than zero */
 +#define COND_EQ  2  /**< equal to zero */
 +#define COND_LT  3  /**< less than zero */
 +#define COND_UN  4  /**< unordered (NaN) */
 +#define COND_GE  5  /**< greater then or equal to zero */
 +#define COND_LE  6  /**< less then or equal to zero */
 +#define COND_NE  7  /**< not equal to zero */
 +#define COND_TR  8  /**< always true */
 +#define COND_FL  9  /**< always false */
 +/*@}*/
 +
 +
 +/**
 + * Instruction precision for GL_NV_fragment_program
 + */
 +/*@{*/
 +#define FLOAT32  0x1
 +#define FLOAT16  0x2
 +#define FIXED12  0x4
 +/*@}*/
 +
 +
 +/**
 + * Saturation modes when storing values.
 + */
 +/*@{*/
 +#define SATURATE_OFF            0
 +#define SATURATE_ZERO_ONE       1
 +#define SATURATE_PLUS_MINUS_ONE 2
 +/*@}*/
 +
 +
 +/**
 + * Per-component negation masks
 + */
 +/*@{*/
 +#define NEGATE_X    0x1
 +#define NEGATE_Y    0x2
 +#define NEGATE_Z    0x4
 +#define NEGATE_W    0x8
 +#define NEGATE_XYZW 0xf
 +#define NEGATE_NONE 0x0
 +/*@}*/
 +
 +
 +/**
 + * Program instruction opcodes, for both vertex and fragment programs.
 + * \note changes to this opcode list must be reflected in t_vb_arbprogram.c
 + */
 +typedef enum prog_opcode {
 +                     /* ARB_vp   ARB_fp   NV_vp   NV_fp     GLSL */
 +                     /*------------------------------------------*/
 +   OPCODE_NOP = 0,   /*                                      X   */
 +   OPCODE_ABS,       /*   X        X       1.1               X   */
 +   OPCODE_ADD,       /*   X        X       X       X         X   */
 +   OPCODE_ARA,       /*                    2                     */
 +   OPCODE_ARL,       /*   X                X                     */
 +   OPCODE_ARL_NV,    /*                    2                     */
 +   OPCODE_ARR,       /*                    2                     */
 +   OPCODE_BGNLOOP,   /*                                     opt  */
 +   OPCODE_BGNSUB,    /*                                     opt  */
 +   OPCODE_BRA,       /*                    2                 X   */
 +   OPCODE_BRK,       /*                    2                opt  */
 +   OPCODE_CAL,       /*                    2       2             */
 +   OPCODE_CMP,       /*            X                             */
 +   OPCODE_CONT,      /*                                     opt  */
 +   OPCODE_COS,       /*            X       2       X         X   */
 +   OPCODE_DDX,       /*                            X         X   */
 +   OPCODE_DDY,       /*                            X         X   */
 +   OPCODE_DP3,       /*   X        X       X       X         X   */
 +   OPCODE_DP4,       /*   X        X       X       X         X   */
 +   OPCODE_DPH,       /*   X        X       1.1                   */
 +   OPCODE_DST,       /*   X        X       X       X             */
 +   OPCODE_ELSE,      /*                                      X   */
 +   OPCODE_END,       /*   X        X       X       X        opt  */
 +   OPCODE_ENDIF,     /*                                     opt  */
 +   OPCODE_ENDLOOP,   /*                                     opt  */
 +   OPCODE_ENDSUB,    /*                                     opt  */
 +   OPCODE_EX2,       /*   X        X       2       X         X   */
 +   OPCODE_EXP,       /*   X                X                 X   */
 +   OPCODE_FLR,       /*   X        X       2       X         X   */
 +   OPCODE_FRC,       /*   X        X       2       X         X   */
 +   OPCODE_IF,        /*                                     opt  */
 +   OPCODE_INT,       /*                                      X   */
 +   OPCODE_KIL,       /*            X                             */
 +   OPCODE_KIL_NV,    /*                            X         X   */
 +   OPCODE_LG2,       /*   X        X       2       X         X   */
 +   OPCODE_LIT,       /*   X        X       X       X             */
 +   OPCODE_LOG,       /*   X                X                 X   */
 +   OPCODE_LRP,       /*            X               X             */
 +   OPCODE_MAD,       /*   X        X       X       X         X   */
 +   OPCODE_MAX,       /*   X        X       X       X         X   */
 +   OPCODE_MIN,       /*   X        X       X       X         X   */
 +   OPCODE_MOV,       /*   X        X       X       X         X   */
 +   OPCODE_MUL,       /*   X        X       X       X         X   */
 +   OPCODE_NOISE1,    /*                                      X   */
 +   OPCODE_NOISE2,    /*                                      X   */
 +   OPCODE_NOISE3,    /*                                      X   */
 +   OPCODE_NOISE4,    /*                                      X   */
 +   OPCODE_PK2H,      /*                            X             */
 +   OPCODE_PK2US,     /*                            X             */
 +   OPCODE_PK4B,      /*                            X             */
 +   OPCODE_PK4UB,     /*                            X             */
 +   OPCODE_POW,       /*   X        X               X         X   */
 +   OPCODE_POPA,      /*                    3                     */
 +   OPCODE_PRINT,     /*                    X       X             */
 +   OPCODE_PUSHA,     /*                    3                     */
 +   OPCODE_RCC,       /*                    1.1                   */
 +   OPCODE_RCP,       /*   X        X       X       X         X   */
 +   OPCODE_RET,       /*                    2       2             */
 +   OPCODE_RFL,       /*            X               X             */
 +   OPCODE_RSQ,       /*   X        X       X       X         X   */
 +   OPCODE_SCS,       /*            X                             */
 +   OPCODE_SEQ,       /*                    2       X         X   */
 +   OPCODE_SFL,       /*                    2       X             */
 +   OPCODE_SGE,       /*   X        X       X       X         X   */
 +   OPCODE_SGT,       /*                    2       X         X   */
 +   OPCODE_SIN,       /*            X       2       X         X   */
 +   OPCODE_SLE,       /*                    2       X         X   */
 +   OPCODE_SLT,       /*   X        X       X       X         X   */
 +   OPCODE_SNE,       /*                    2       X         X   */
 +   OPCODE_SSG,       /*                    2                     */
 +   OPCODE_STR,       /*                    2       X             */
 +   OPCODE_SUB,       /*   X        X       1.1     X         X   */
 +   OPCODE_SWZ,       /*   X        X                             */
 +   OPCODE_TEX,       /*            X       3       X         X   */
 +   OPCODE_TXB,       /*            X       3                 X   */
 +   OPCODE_TXD,       /*                            X         X   */
 +   OPCODE_TXL,       /*                    3       2         X   */
 +   OPCODE_TXP,       /*            X                         X   */
 +   OPCODE_TXP_NV,    /*                    3       X             */
 +   OPCODE_UP2H,      /*                            X             */
 +   OPCODE_UP2US,     /*                            X             */
 +   OPCODE_UP4B,      /*                            X             */
 +   OPCODE_UP4UB,     /*                            X             */
 +   OPCODE_X2D,       /*                            X             */
 +   OPCODE_XPD,       /*   X        X                         X   */
 +   MAX_OPCODE
 +} gl_inst_opcode;
 +
 +
 +/**
 + * Instruction source register.
 + */
 +struct prog_src_register
 +{
 +   GLuint File:4;     /**< One of the PROGRAM_* register file values. */
 +   GLint Index:9;     /**< May be negative for relative addressing. */
 +   GLuint Swizzle:12;
 +   GLuint RelAddr:1;
 +
 +   /**
 +    * \name Source register "sign" control.
 +    *
 +    * The ARB and NV extensions allow varrying degrees of control over the
 +    * sign of the source vector components.  These values allow enough control
 +    * for all flavors of the extensions.
 +    */
 +   /*@{*/
 +   /**
 +    * Per-component negation for the SWZ instruction.  For non-SWZ
 +    * instructions the only possible values are NEGATE_XYZW and NEGATE_NONE.
 +    *
 +    * \since
 +    * ARB_vertex_program, ARB_fragment_program
 +    */
 +   GLuint NegateBase:4;
 +
 +   /**
 +    * Take the component-wise absolute value.
 +    *
 +    * \since
 +    * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
 +    * NV_vertex_program2_option.
 +    */
 +   GLuint Abs:1;
 +
 +   /**
 +    * Post-absolute value negation (all components).
 +    */
 +   GLuint NegateAbs:1;
 +   /*@}*/
 +};
 +
 +
 +/**
 + * Instruction destination register.
 + */
 +struct prog_dst_register
 +{
 +   /**
 +    * One of the PROGRAM_* register file values.
 +    */
 +   GLuint File:4;
 +
 +   GLuint Index:8;
 +   GLuint WriteMask:4;
 +
 +   /**
 +    * \name Conditional destination update control.
 +    *
 +    * \since
 +    * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
 +    * NV_vertex_program2_option.
 +    */
 +   /*@{*/
 +   /**
 +    * Takes one of the 9 possible condition values (EQ, FL, GT, GE, LE, LT,
 +    * NE, TR, or UN).  Destination update is enabled if the matching
 +    * (swizzled) condition code value passes.  When a conditional update mask
 +    * is not specified, this will be \c COND_TR.
 +    */
 +   GLuint CondMask:4;
 +
 +   /**
 +    * Condition code swizzle value.
 +    */
 +   GLuint CondSwizzle:12;
 +   
 +   /**
 +    * Selects the condition code register to use for conditional destination
 +    * update masking.  In NV_fragmnet_program or NV_vertex_program2 mode, only
 +    * condition code register 0 is available.  In NV_vertex_program3 mode, 
 +    * condition code registers 0 and 1 are available.
 +    */
 +   GLuint CondSrc:1;
 +   /*@}*/
 +
 +   GLuint pad:31;
 +};
 +
 +
 +/**
 + * Vertex/fragment program instruction.
 + */
 +struct prog_instruction
 +{
 +   gl_inst_opcode Opcode;
 +#if FEATURE_MESA_program_debug
 +   GLshort StringPos;
 +#endif
 +   /**
 +    * Arbitrary data.  Used for the PRINT, CAL, and BRA instructions.
 +    */
 +   void *Data;
 +
 +   struct prog_src_register SrcReg[3];
 +   struct prog_dst_register DstReg;
 +
 +   /**
 +    * Indicates that the instruction should update the condition code
 +    * register.
 +    *
 +    * \since
 +    * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
 +    * NV_vertex_program2_option.
 +    */
 +   GLuint CondUpdate:1;
 +
 +   /**
 +    * If prog_instruction::CondUpdate is \c GL_TRUE, this value selects the
 +    * condition code register that is to be updated.
 +    *
 +    * In GL_NV_fragment_program or GL_NV_vertex_program2 mode, only condition
 +    * code register 0 is available.  In GL_NV_vertex_program3 mode, condition
 +    * code registers 0 and 1 are available.
 +    *
 +    * \since
 +    * NV_fragment_program, NV_fragment_program_option, NV_vertex_program2,
 +    * NV_vertex_program2_option.
 +    */
 +   GLuint CondDst:1;
 +
 +   /**
 +    * Saturate each value of the vectored result to the range [0,1] or the
 +    * range [-1,1].  \c SSAT mode (i.e., saturation to the range [-1,1]) is
 +    * only available in NV_fragment_program2 mode.
 +    * Value is one of the SATURATE_* tokens.
 +    *
 +    * \since
 +    * NV_fragment_program, NV_fragment_program_option, NV_vertex_program3.
 +    */
 +   GLuint SaturateMode:2;
 +   
 +   /**
 +    * Per-instruction selectable precision.
 +    *
 +    * \since
 +    * NV_fragment_program, NV_fragment_program_option.
 +    */
 +   GLuint Precision:3;
 +
 +   /**
 +    * \name Texture source controls.
 +    * 
 +    * The texture source controls are only used with the \c TEX, \c TXD,
 +    * \c TXL, and \c TXP instructions.
 +    *
 +    * \since
 +    * ARB_fragment_program, NV_fragment_program, NV_vertex_program3.
 +    */
 +   /*@{*/
 +   /**
 +    * Source texture unit.  OpenGL supports a maximum of 32 texture
 +    * units.
 +    */
 +   GLuint TexSrcUnit:5;
 +   
 +   /**
 +    * Source texture target, one of TEXTURE_{1D,2D,3D,CUBE,RECT}_INDEX.
 +    */
 +   GLuint TexSrcTarget:3;
 +   /*@}*/
 +
 +   /**
 +    * For BRA and CAL instructions, the location to jump to.
 +    * For BGNLOOP, points to ENDLOOP (and vice-versa).
 +    * For BRK, points to BGNLOOP (which points to ENDLOOP).
 +    * For IF, points to else or endif.
 +    * For ELSE, points to endif.
 +    */
 +   GLint BranchTarget;
 +
 +   /**
 +    * For TEX instructions in shaders, the sampler to use for the
 +    * texture lookup.
 +    */
 +   GLint Sampler;
 +
 +   const char *Comment;
 +};
 +
 +
 +extern void
 +_mesa_init_instructions(struct prog_instruction *inst, GLuint count);
 +
 +extern struct prog_instruction *
 +_mesa_alloc_instructions(GLuint numInst);
 +
 +extern struct prog_instruction *
 +_mesa_realloc_instructions(struct prog_instruction *oldInst,
 +                           GLuint numOldInst, GLuint numNewInst);
 +
++extern struct prog_instruction *
++_mesa_copy_instructions(struct prog_instruction *dest,
++                        const struct prog_instruction *src, GLuint n);
++
 +extern GLuint
 +_mesa_num_inst_src_regs(gl_inst_opcode opcode);
 +
 +extern const char *
 +_mesa_opcode_string(gl_inst_opcode opcode);
 +
 +
 +#endif /* PROG_INSTRUCTION_H */
Simple merge
Simple merge
Simple merge
Simple merge
index f9e5045be71855ac3f01a0bc3614e4165f11f6be,2663d993049ffe77f820c27c680f311a4ed4b389..8b8bb3a173ac74943d74c637385dff5313a82a6e
@@@ -853,7 -850,7 +853,7 @@@ static struct ureg calculate_light_atte
  
  
  /* Need to add some addtional parameters to allow lighting in object
-- * space - STATE_SPOT_DIRECTION and STATE_HALF implicitly assume eye
++ * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
   * space lighting.
   */
  static void build_lighting( struct tnl_program *p )
             */
            VPpli = register_param3(p, STATE_LIGHT, i, 
                                    STATE_POSITION_NORMALIZED); 
-           half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+             if (p->state->light_local_viewer) {
+                 struct ureg eye_hat = get_eye_position_normalized(p);
+                 half = get_temp(p);
+                 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
+                 emit_normalize_vec3(p, half, half);
+             } else {
 -                half = register_param3(p, STATE_LIGHT, i, STATE_HALF);
++                half = register_param3(p, STATE_LIGHT, i, STATE_HALF_VECTOR);
+             }
         } 
         else {
            struct ureg Ppli = register_param3(p, STATE_LIGHT, i,