mesa: add & use a new driver flag for UBO updates instead of _NEW_BUFFER_OBJECT
[mesa.git] / src / mesa / drivers / dri / i915 / i915_fragprog.c
index e60157f37772a9d4d0c91aad2d3f5b83c485eb64..930c2b876bc9cb4ba50bebafe00ff05ec817112c 100644 (file)
 #include "main/macros.h"
 #include "main/enums.h"
 
-#include "shader/prog_instruction.h"
-#include "shader/prog_parameter.h"
-#include "shader/program.h"
-#include "shader/programopt.h"
-#include "shader/prog_print.h"
+#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
+#include "program/program.h"
+#include "program/programopt.h"
+#include "program/prog_print.h"
 
 #include "tnl/tnl.h"
 #include "tnl/t_context.h"
@@ -97,43 +97,43 @@ src_vector(struct i915_fragment_program *p,
       break;
    case PROGRAM_INPUT:
       switch (source->Index) {
-      case FRAG_ATTRIB_WPOS:
+      case VARYING_SLOT_POS:
          src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
          break;
-      case FRAG_ATTRIB_COL0:
+      case VARYING_SLOT_COL0:
          src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
          break;
-      case FRAG_ATTRIB_COL1:
+      case VARYING_SLOT_COL1:
          src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
          src = swizzle(src, X, Y, Z, ONE);
          break;
-      case FRAG_ATTRIB_FOGC:
+      case VARYING_SLOT_FOGC:
          src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
          src = swizzle(src, W, ZERO, ZERO, ONE);
          break;
-      case FRAG_ATTRIB_TEX0:
-      case FRAG_ATTRIB_TEX1:
-      case FRAG_ATTRIB_TEX2:
-      case FRAG_ATTRIB_TEX3:
-      case FRAG_ATTRIB_TEX4:
-      case FRAG_ATTRIB_TEX5:
-      case FRAG_ATTRIB_TEX6:
-      case FRAG_ATTRIB_TEX7:
+      case VARYING_SLOT_TEX0:
+      case VARYING_SLOT_TEX1:
+      case VARYING_SLOT_TEX2:
+      case VARYING_SLOT_TEX3:
+      case VARYING_SLOT_TEX4:
+      case VARYING_SLOT_TEX5:
+      case VARYING_SLOT_TEX6:
+      case VARYING_SLOT_TEX7:
          src = i915_emit_decl(p, REG_TYPE_T,
-                              T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
+                              T_TEX0 + (source->Index - VARYING_SLOT_TEX0),
                               D0_CHANNEL_ALL);
         break;
 
-      case FRAG_ATTRIB_VAR0:
-      case FRAG_ATTRIB_VAR0 + 1:
-      case FRAG_ATTRIB_VAR0 + 2:
-      case FRAG_ATTRIB_VAR0 + 3:
-      case FRAG_ATTRIB_VAR0 + 4:
-      case FRAG_ATTRIB_VAR0 + 5:
-      case FRAG_ATTRIB_VAR0 + 6:
-      case FRAG_ATTRIB_VAR0 + 7:
+      case VARYING_SLOT_VAR0:
+      case VARYING_SLOT_VAR0 + 1:
+      case VARYING_SLOT_VAR0 + 2:
+      case VARYING_SLOT_VAR0 + 3:
+      case VARYING_SLOT_VAR0 + 4:
+      case VARYING_SLOT_VAR0 + 5:
+      case VARYING_SLOT_VAR0 + 6:
+      case VARYING_SLOT_VAR0 + 7:
          src = i915_emit_decl(p, REG_TYPE_T,
-                              T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0),
+                              T_TEX0 + (source->Index - VARYING_SLOT_VAR0),
                               D0_CHANNEL_ALL);
          break;
 
@@ -143,6 +143,20 @@ src_vector(struct i915_fragment_program *p,
       }
       break;
 
+   case PROGRAM_OUTPUT:
+      switch (source->Index) {
+      case FRAG_RESULT_COLOR:
+        src = UREG(REG_TYPE_OC, 0);
+        break;
+      case FRAG_RESULT_DEPTH:
+        src = UREG(REG_TYPE_OD, 0);
+        break;
+      default:
+        i915_program_error(p, "Bad source->Index: %d", source->Index);
+        return 0;
+      }
+      break;
+
       /* Various paramters and env values.  All emitted to
        * hardware as program constants.
        */
@@ -159,12 +173,9 @@ src_vector(struct i915_fragment_program *p,
 
    case PROGRAM_CONSTANT:
    case PROGRAM_STATE_VAR:
-   case PROGRAM_NAMED_PARAM:
    case PROGRAM_UNIFORM:
-      src =
-         i915_emit_param4fv(p,
-                            program->Base.Parameters->ParameterValues[source->
-                                                                      Index]);
+      src = i915_emit_param4fv(p,
+        &program->Base.Parameters->ParameterValues[source->Index][0].f);
       break;
 
    default:
@@ -196,6 +207,7 @@ get_result_vector(struct i915_fragment_program *p,
    case PROGRAM_OUTPUT:
       switch (inst->DstReg.Index) {
       case FRAG_RESULT_COLOR:
+      case FRAG_RESULT_DATA0:
          return UREG(REG_TYPE_OC, 0);
       case FRAG_RESULT_DEPTH:
          p->depth_written = 1;
@@ -255,7 +267,7 @@ translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
 #define EMIT_TEX( OP )                                         \
 do {                                                           \
    GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );     \
-   const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; \
+   const struct gl_fragment_program *program = &p->FragProg;   \
    GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit]; \
    GLuint sampler = i915_emit_decl(p, REG_TYPE_S,              \
                                   unit, dim);                  \
@@ -288,10 +300,11 @@ do {                                                                      \
 /* 
  * TODO: consider moving this into core 
  */
-static void calc_live_regs( struct i915_fragment_program *p )
+static bool calc_live_regs( struct i915_fragment_program *p )
 {
-    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
-    GLuint regsUsed = 0xffff0000;
+    const struct gl_fragment_program *program = &p->FragProg;
+    GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1);
+    uint8_t live_components[I915_MAX_TEMPORARY] = { 0, };
     GLint i;
    
     for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
@@ -300,23 +313,44 @@ static void calc_live_regs( struct i915_fragment_program *p )
         int a;
 
         /* Register is written to: unmark as live for this and preceeding ops */ 
-        if (inst->DstReg.File == PROGRAM_TEMPORARY)
-            regsUsed &= ~(1 << inst->DstReg.Index);
+        if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+           if (inst->DstReg.Index >= I915_MAX_TEMPORARY)
+              return false;
+
+            live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
+            if (live_components[inst->DstReg.Index] == 0)
+                regsUsed &= ~(1 << inst->DstReg.Index);
+        }
 
         for (a = 0; a < opArgs; a++) {
             /* Register is read from: mark as live for this and preceeding ops */ 
-            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY)
+            if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
+                unsigned c;
+
+               if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY)
+                  return false;
+
                 regsUsed |= 1 << inst->SrcReg[a].Index;
+
+                for (c = 0; c < 4; c++) {
+                    const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c);
+
+                    if (field <= SWIZZLE_W)
+                        live_components[inst->SrcReg[a].Index] |= (1U << field);
+                }
+            }
         }
 
         p->usedRegs[i] = regsUsed;
     }
+
+    return true;
 }
 
 static GLuint get_live_regs( struct i915_fragment_program *p, 
                              const struct prog_instruction *inst )
 {
-    const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current;
+    const struct gl_fragment_program *program = &p->FragProg;
     GLuint nr = inst - program->Base.Instructions;
 
     return p->usedRegs[nr];
@@ -337,8 +371,7 @@ static GLuint get_live_regs( struct i915_fragment_program *p,
 static void
 upload_program(struct i915_fragment_program *p)
 {
-   const struct gl_fragment_program *program =
-      p->ctx->FragmentProgram._Current;
+   const struct gl_fragment_program *program = &p->FragProg;
    const struct prog_instruction *inst = program->Base.Instructions;
 
    if (INTEL_DEBUG & DEBUG_WM)
@@ -366,7 +399,10 @@ upload_program(struct i915_fragment_program *p)
 
    /* Not always needed:
     */
-   calc_live_regs(p);
+   if (!calc_live_regs(p)) {
+      i915_program_error(p, "Could not allocate registers");
+      return;
+   }
 
    while (1) {
       GLuint src0, src1, src2, flags;
@@ -472,6 +508,18 @@ upload_program(struct i915_fragment_program *p)
                         swizzle(tmp, X, X, X, X));
          break;
 
+      case OPCODE_DP2:
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+        i915_emit_arith(p,
+                        A0_DP3,
+                         get_result_vector(p, inst),
+                         get_result_flags(inst), 0,
+                        swizzle(src0, X, Y, ZERO, ZERO),
+                        swizzle(src1, X, Y, ZERO, ZERO),
+                        0);
+         break;
+
       case OPCODE_DP3:
          EMIT_2ARG_ARITH(A0_DP3);
          break;
@@ -543,10 +591,14 @@ upload_program(struct i915_fragment_program *p)
         if (inst->DstReg.CondMask == COND_TR) {
            tmp = i915_get_utemp(p);
 
+           /* The KIL instruction discards the fragment if any component of
+            * the source is < 0.  Emit an immediate operand of {-1}.xywz.
+            */
            i915_emit_texld(p, get_live_regs(p, inst),
                            tmp, A0_DEST_CHANNEL_ALL,
                            0, /* use a dummy dest reg */
-                           swizzle(tmp, ONE, ONE, ONE, ONE), /* always */
+                           negate(swizzle(tmp, ONE, ONE, ONE, ONE),
+                                  1, 1, 1, 1),
                            T0_TEXKILL);
         } else {
            p->error = 1;
@@ -654,21 +706,6 @@ upload_program(struct i915_fragment_program *p)
          EMIT_2ARG_ARITH(A0_MUL);
          break;
 
-      case OPCODE_NOISE1:
-      case OPCODE_NOISE2:
-      case OPCODE_NOISE3:
-      case OPCODE_NOISE4:
-        /* Don't implement noise because we just don't have the instructions
-         * to spare.  We aren't the first vendor to do so.
-         */
-        i915_program_error(p, "Stubbed-out noise functions");
-        i915_emit_arith(p,
-                        A0_MOV,
-                        get_result_vector(p, inst),
-                        get_result_flags(inst), 0,
-                        swizzle(tmp, ZERO, ZERO, ZERO, ZERO), 0, 0);
-        break;
-
       case OPCODE_POW:
          src0 = src_vector(p, &inst->SrcReg[0], program);
          src1 = src_vector(p, &inst->SrcReg[1], program);
@@ -780,18 +817,18 @@ upload_program(struct i915_fragment_program *p)
         flags = get_result_flags(inst);
         dst = get_result_vector(p, inst);
 
-        /* dst = src1 >= src2 */
+        /* tmp = src1 >= src2 */
         i915_emit_arith(p,
                         A0_SGE,
-                        dst,
+                        tmp,
                         flags, 0,
                         src_vector(p, &inst->SrcReg[0], program),
                         src_vector(p, &inst->SrcReg[1], program),
                         0);
-        /* tmp = src1 <= src2 */
+        /* dst = src1 <= src2 */
         i915_emit_arith(p,
                         A0_SGE,
-                        tmp,
+                        dst,
                         flags, 0,
                         negate(src_vector(p, &inst->SrcReg[0], program),
                                1, 1, 1, 1),
@@ -929,18 +966,18 @@ upload_program(struct i915_fragment_program *p)
         flags = get_result_flags(inst);
         dst = get_result_vector(p, inst);
 
-        /* dst = src1 < src2 */
+        /* tmp = src1 < src2 */
         i915_emit_arith(p,
                         A0_SLT,
-                        dst,
+                        tmp,
                         flags, 0,
                         src_vector(p, &inst->SrcReg[0], program),
                         src_vector(p, &inst->SrcReg[1], program),
                         0);
-        /* tmp = src1 > src2 */
+        /* dst = src1 > src2 */
         i915_emit_arith(p,
                         A0_SLT,
-                        tmp,
+                        dst,
                         flags, 0,
                         negate(src_vector(p, &inst->SrcReg[0], program),
                                1, 1, 1, 1),
@@ -957,6 +994,41 @@ upload_program(struct i915_fragment_program *p)
                         0);
          break;
 
+      case OPCODE_SSG:
+        dst = get_result_vector(p, inst);
+        flags = get_result_flags(inst);
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+        tmp = i915_get_utemp(p);
+
+        /* tmp = (src < 0.0) */
+        i915_emit_arith(p,
+                        A0_SLT,
+                        tmp,
+                        flags, 0,
+                        src0,
+                        swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+                        0);
+
+        /* dst = (0.0 < src) */
+        i915_emit_arith(p,
+                        A0_SLT,
+                        dst,
+                        flags, 0,
+                        swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+                        src0,
+                        0);
+
+        /* dst = (src > 0.0) - (src < 0.0) */
+        i915_emit_arith(p,
+                        A0_ADD,
+                        dst,
+                        flags, 0,
+                        dst,
+                        negate(tmp, 1, 1, 1, 1),
+                        0);
+
+         break;
+
       case OPCODE_SUB:
          src0 = src_vector(p, &inst->SrcReg[0], program);
          src1 = src_vector(p, &inst->SrcReg[1], program);
@@ -1015,7 +1087,6 @@ upload_program(struct i915_fragment_program *p)
 
       case OPCODE_BGNLOOP:
       case OPCODE_BGNSUB:
-      case OPCODE_BRA:
       case OPCODE_BRK:
       case OPCODE_CAL:
       case OPCODE_CONT:
@@ -1075,21 +1146,21 @@ fixup_depth_write(struct i915_fragment_program *p)
 static void
 check_wpos(struct i915_fragment_program *p)
 {
-   GLuint inputs = p->FragProg.Base.InputsRead;
+   GLbitfield64 inputs = p->FragProg.Base.InputsRead;
    GLint i;
 
    p->wpos_tex = -1;
 
    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
-      if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i)))
+      if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i)))
          continue;
-      else if (inputs & FRAG_BIT_WPOS) {
+      else if (inputs & VARYING_BIT_POS) {
          p->wpos_tex = i;
-         inputs &= ~FRAG_BIT_WPOS;
+         inputs &= ~VARYING_BIT_POS;
       }
    }
 
-   if (inputs & FRAG_BIT_WPOS) {
+   if (inputs & VARYING_BIT_POS) {
       i915_program_error(p, "No free texcoord for wpos value");
    }
 }
@@ -1102,7 +1173,7 @@ translate_program(struct i915_fragment_program *p)
 
    if (INTEL_DEBUG & DEBUG_WM) {
       printf("fp:\n");
-      _mesa_print_program(&p->ctx->FragmentProgram._Current->Base);
+      _mesa_print_program(&p->FragProg.Base);
       printf("\n");
    }
 
@@ -1112,11 +1183,6 @@ translate_program(struct i915_fragment_program *p)
    fixup_depth_write(p);
    i915_fini_program(p);
 
-   if (INTEL_DEBUG & DEBUG_WM) {
-      printf("i915:\n");
-      i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
-   }
-
    p->translated = 1;
 }
 
@@ -1140,7 +1206,7 @@ track_params(struct i915_fragment_program *p)
 
 
 static void
-i915BindProgram(GLcontext * ctx, GLenum target, struct gl_program *prog)
+i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog)
 {
    if (target == GL_FRAGMENT_PROGRAM_ARB) {
       struct i915_context *i915 = I915_CONTEXT(ctx);
@@ -1163,7 +1229,7 @@ i915BindProgram(GLcontext * ctx, GLenum target, struct gl_program *prog)
 }
 
 static struct gl_program *
-i915NewProgram(GLcontext * ctx, GLenum target, GLuint id)
+i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
 {
    switch (target) {
    case GL_VERTEX_PROGRAM_ARB:
@@ -1191,7 +1257,7 @@ i915NewProgram(GLcontext * ctx, GLenum target, GLuint id)
 }
 
 static void
-i915DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog)
 {
    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
       struct i915_context *i915 = I915_CONTEXT(ctx);
@@ -1206,7 +1272,7 @@ i915DeleteProgram(GLcontext * ctx, struct gl_program *prog)
 
 
 static GLboolean
-i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog)
 {
    if (target == GL_FRAGMENT_PROGRAM_ARB) {
       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
@@ -1217,35 +1283,33 @@ i915IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
       return !p->error;
    }
    else
-      return GL_TRUE;
+      return true;
 }
 
 static GLboolean
-i915ProgramStringNotify(GLcontext * ctx,
+i915ProgramStringNotify(struct gl_context * ctx,
                         GLenum target, struct gl_program *prog)
 {
    if (target == GL_FRAGMENT_PROGRAM_ARB) {
       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
       p->translated = 0;
-
-      /* Hack: make sure fog is correctly enabled according to this
-       * fragment program's fog options.
-       */
-      if (p->FragProg.FogOption) {
-         /* add extra instructions to do fog, then turn off FogOption field */
-         _mesa_append_fog_code(ctx, &p->FragProg);
-         p->FragProg.FogOption = GL_NONE;
-      }
    }
 
    (void) _tnl_program_string(ctx, target, prog);
 
    /* XXX check if program is legal, within limits */
-   return GL_TRUE;
+   return true;
+}
+
+static void
+i915SamplerUniformChange(struct gl_context *ctx,
+                         GLenum target, struct gl_program *prog)
+{
+   i915ProgramStringNotify(ctx, target, prog);
 }
 
 void
-i915_update_program(GLcontext *ctx)
+i915_update_program(struct gl_context *ctx)
 {
    struct intel_context *intel = intel_context(ctx);
    struct i915_context *i915 = i915_context(&intel->ctx);
@@ -1270,7 +1334,7 @@ i915_update_program(GLcontext *ctx)
 void
 i915ValidateFragmentProgram(struct i915_context *i915)
 {
-   GLcontext *ctx = &i915->intel.ctx;
+   struct gl_context *ctx = &i915->intel.ctx;
    struct intel_context *intel = intel_context(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
@@ -1278,7 +1342,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
    struct i915_fragment_program *p =
       (struct i915_fragment_program *) ctx->FragmentProgram._Current;
 
-   const GLuint inputsRead = p->FragProg.Base.InputsRead;
+   const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead;
    GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
    GLuint s2 = S2_TEXCOORD_NONE;
    int i, offset = 0;
@@ -1292,33 +1356,36 @@ i915ValidateFragmentProgram(struct i915_context *i915)
 
    intel->vertex_attr_count = 0;
    intel->wpos_offset = 0;
-   intel->wpos_size = 0;
    intel->coloroffset = 0;
    intel->specoffset = 0;
 
-   if (inputsRead & FRAG_BITS_TEX_ANY) {
+   if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) {
       EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
    }
    else {
       EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
    }
 
-   if (inputsRead & FRAG_BIT_COL0) {
+   /* Handle gl_PointSize builtin var here */
+   if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
+      EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
+
+   if (inputsRead & VARYING_BIT_COL0) {
       intel->coloroffset = offset / 4;
       EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
    }
 
-   if (inputsRead & FRAG_BIT_COL1) {
+   if (inputsRead & VARYING_BIT_COL1) {
        intel->specoffset = offset / 4;
        EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
    }
 
-   if ((inputsRead & FRAG_BIT_FOGC) || i915->vertex_fog != I915_FOG_NONE) {
+   if ((inputsRead & VARYING_BIT_FOGC)) {
       EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
    }
 
    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
-      if (inputsRead & FRAG_BIT_TEX(i)) {
+      if (inputsRead & VARYING_BIT_TEX(i)) {
          int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
 
          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
@@ -1326,7 +1393,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
 
          EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
       }
-      else if (inputsRead & FRAG_BIT_VAR(i)) {
+      else if (inputsRead & VARYING_BIT_VAR(i)) {
          int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
 
          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
@@ -1335,17 +1402,15 @@ i915ValidateFragmentProgram(struct i915_context *i915)
          EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
       }
       else if (i == p->wpos_tex) {
-
+        int wpos_size = 4 * sizeof(float);
          /* If WPOS is required, duplicate the XYZ position data in an
           * unused texture coordinate:
           */
          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
-         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(3));
+         s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
 
          intel->wpos_offset = offset;
-         intel->wpos_size = 3 * sizeof(GLuint);
-
-         EMIT_PAD(intel->wpos_size);
+         EMIT_PAD(wpos_size);
       }
    }
 
@@ -1363,6 +1428,10 @@ i915ValidateFragmentProgram(struct i915_context *i915)
                                               intel->vertex_attr_count,
                                               intel->ViewportMatrix.m, 0);
 
+      assert(intel->prim.current_offset == intel->prim.start_offset);
+      intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
+      intel->prim.current_offset = intel->prim.start_offset;
+
       intel->vertex_size >>= 2;
 
       i915->state.Ctx[I915_CTXREG_LIS2] = s2;
@@ -1377,6 +1446,11 @@ i915ValidateFragmentProgram(struct i915_context *i915)
 
    if (!p->on_hardware)
       i915_upload_program(i915, p);
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("i915:\n");
+      i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
+   }
 }
 
 void
@@ -1387,4 +1461,5 @@ i915InitFragProgFuncs(struct dd_function_table *functions)
    functions->DeleteProgram = i915DeleteProgram;
    functions->IsProgramNative = i915IsProgramNative;
    functions->ProgramStringNotify = i915ProgramStringNotify;
+   functions->SamplerUniformChange = i915SamplerUniformChange;
 }