galahad: do map/unmap counting for resources
[mesa.git] / src / gallium / drivers / i965 / brw_wm_fp.c
index 5f47d86f717c33a169404bb7a5a45de47d378519..f7ee55cc1c85b03b3a37276b0155a726fe978d2a 100644 (file)
   */
                
 
-#include "pipe/p_shader_constants.h"
+#include "pipe/p_shader_tokens.h"
 
-#include "brw_context.h"
-#include "brw_wm.h"
-#include "brw_util.h"
-
-
-#define X    0
-#define Y    1
-#define Z    2
-#define W    3
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_util.h"
 
-static const char *wm_opcode_strings[] = {   
-   "PIXELXY",
-   "DELTAXY",
-   "PIXELW",
-   "LINTERP",
-   "PINTERP",
-   "CINTERP",
-   "WPOSXY",
-   "FB_WRITE",
-   "FRONTFACING",
-};
-
+#include "brw_wm.h"
+#include "brw_debug.h"
 
 
 /***********************************************************************
  * Source regs
  */
 
-static struct prog_src_register src_reg(GLuint file, GLuint idx)
+static struct brw_fp_src src_reg(GLuint file, GLuint idx)
 {
-   struct prog_src_register reg;
-   reg.File = file;
-   reg.Index = idx;
-   reg.Swizzle = SWIZZLE_NOOP;
-   reg.RelAddr = 0;
-   reg.Negate = NEGATE_NONE;
-   reg.Abs = 0;
+   struct brw_fp_src reg;
+   reg.file = file;
+   reg.index = idx;
+   reg.swizzle = BRW_SWIZZLE_XYZW;
+   reg.indirect = 0;
+   reg.negate = 0;
+   reg.abs = 0;
    return reg;
 }
 
-static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
+static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
 {
-   return src_reg(dst.File, dst.Index);
+   return src_reg(dst.file, dst.index);
 }
 
-static struct prog_src_register src_undef( void )
+static struct brw_fp_src src_undef( void )
 {
-   return src_reg(PROGRAM_UNDEFINED, 0);
+   return src_reg(TGSI_FILE_NULL, 0);
 }
 
-static GLboolean src_is_undef(struct prog_src_register src)
+static GLboolean src_is_undef(struct brw_fp_src src)
 {
-   return src.File == PROGRAM_UNDEFINED;
+   return src.file == TGSI_FILE_NULL;
 }
 
-static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
+static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
 {
-   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
+   unsigned swz = reg.swizzle;
+
+   reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
+                  BRW_GET_SWZ(swz, y) << 2 |
+                  BRW_GET_SWZ(swz, z) << 4 |
+                  BRW_GET_SWZ(swz, w) << 6 );
+
    return reg;
 }
 
-static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
+static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
 {
    return src_swizzle(reg, x, x, x, x);
 }
 
-static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
+static struct brw_fp_src src_abs( struct brw_fp_src src )
 {
-   reg.Swizzle = swizzle;
-   return reg;
+   src.negate = 0;
+   src.abs = 1;
+   return src;
+}
+
+static struct brw_fp_src src_negate( struct brw_fp_src src )
+{
+   src.negate = 1;
+   src.abs = 0;
+   return src;
+}
+
+
+static int match_or_expand_immediate( const float *v,
+                                      unsigned nr,
+                                      float *v2,
+                                      unsigned *nr2,
+                                      unsigned *swizzle )
+{
+   unsigned i, j;
+   
+   *swizzle = 0;
+
+   for (i = 0; i < nr; i++) {
+      boolean found = FALSE;
+
+      for (j = 0; j < *nr2 && !found; j++) {
+         if (v[i] == v2[j]) {
+            *swizzle |= j << (i * 2);
+            found = TRUE;
+         }
+      }
+
+      if (!found) {
+         if (*nr2 >= 4) 
+            return FALSE;
+
+         v2[*nr2] = v[i];
+         *swizzle |= *nr2 << (i * 2);
+         (*nr2)++;
+      }
+   }
+
+   return TRUE;
+}
+
+
+
+/* Internally generated immediates: overkill...
+ */
+static struct brw_fp_src src_imm( struct brw_wm_compile *c, 
+                                 const GLfloat *v, 
+                                 unsigned nr)
+{
+   unsigned i, j;
+   unsigned swizzle;
+
+   /* Could do a first pass where we examine all existing immediates
+    * without expanding.
+    */
+
+   for (i = 0; i < c->nr_immediates; i++) {
+      if (match_or_expand_immediate( v, 
+                                     nr,
+                                     c->immediate[i].v,
+                                     &c->immediate[i].nr, 
+                                     &swizzle ))
+         goto out;
+   }
+
+   if (c->nr_immediates < Elements(c->immediate)) {
+      i = c->nr_immediates++;
+      if (match_or_expand_immediate( v,
+                                     nr,
+                                     c->immediate[i].v,
+                                     &c->immediate[i].nr, 
+                                     &swizzle ))
+         goto out;
+   }
+
+   c->error = 1;
+   return src_undef();
+
+out:
+   /* Make sure that all referenced elements are from this immediate.
+    * Has the effect of making size-one immediates into scalars.
+    */
+   for (j = nr; j < 4; j++)
+      swizzle |= (swizzle & 0x3) << (j * 2);
+
+   return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
+                      BRW_GET_SWZ(swizzle, X),
+                      BRW_GET_SWZ(swizzle, Y),
+                      BRW_GET_SWZ(swizzle, Z),
+                      BRW_GET_SWZ(swizzle, W) );
 }
 
 
+
+static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
+                                   GLfloat f )
+{
+   return src_imm(c, &f, 1);
+}
+
+static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
+                                   GLfloat x,
+                                   GLfloat y,
+                                   GLfloat z,
+                                   GLfloat w)
+{
+   GLfloat f[4] = {x,y,z,w};
+   return src_imm(c, f, 4);
+}
+
+
+
 /***********************************************************************
  * Dest regs
  */
 
-static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
+static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
 {
-   struct prog_dst_register reg;
-   reg.File = file;
-   reg.Index = idx;
-   reg.WriteMask = WRITEMASK_XYZW;
-   reg.RelAddr = 0;
-   reg.CondMask = COND_TR;
-   reg.CondSwizzle = 0;
-   reg.CondSrc = 0;
-   reg.pad = 0;
+   struct brw_fp_dst reg;
+   reg.file = file;
+   reg.index = idx;
+   reg.writemask = BRW_WRITEMASK_XYZW;
+   reg.indirect = 0;
+   reg.saturate = 0;
    return reg;
 }
 
-static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
+static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
 {
-   reg.WriteMask &= mask;
+   reg.writemask &= mask;
    return reg;
 }
 
-static struct prog_dst_register dst_undef( void )
+static struct brw_fp_dst dst_undef( void )
 {
-   return dst_reg(PROGRAM_UNDEFINED, 0);
+   return dst_reg(TGSI_FILE_NULL, 0);
 }
 
+static boolean dst_is_undef( struct brw_fp_dst dst )
+{
+   return dst.file == TGSI_FILE_NULL;
+}
 
+static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
+{
+   reg.saturate = flag;
+   return reg;
+}
 
-static struct prog_dst_register get_temp( struct brw_wm_compile *c )
+static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
 {
-   int bit = _mesa_ffs( ~c->fp_temp );
+   int bit = ffs( ~c->fp_temp );
 
    if (!bit) {
-      _mesa_printf("%s: out of temporaries\n", __FILE__);
-      exit(1);
+      debug_printf("%s: out of temporaries\n", __FILE__);
    }
 
    c->fp_temp |= 1<<(bit-1);
-   return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1));
+   return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
 }
 
 
-static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
+static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
 {
-   c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp));
+   c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
 }
 
 
@@ -161,73 +271,107 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem
  * Instructions 
  */
 
-static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
+static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
 {
-   return &c->prog_instructions[c->nr_fp_insns++];
+   return &c->fp_instructions[c->nr_fp_insns++];
 }
 
-static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
-                                       const struct prog_instruction *inst0)
+static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
+                                            GLuint op,
+                                            struct brw_fp_dst dest,
+                                            GLuint tex_unit,
+                                            GLuint target,
+                                            GLuint sampler,
+                                            struct brw_fp_src src0,
+                                            struct brw_fp_src src1,
+                                            struct brw_fp_src src2 )
 {
-   struct prog_instruction *inst = get_fp_inst(c);
-   *inst = *inst0;
+   struct brw_fp_instruction *inst = get_fp_inst(c);
+
+   if (tex_unit || target)
+      assert(op == TGSI_OPCODE_TXP ||
+             op == TGSI_OPCODE_TXB ||
+             op == TGSI_OPCODE_TEX ||
+             op == WM_FB_WRITE);
+
+   inst->opcode = op;
+   inst->dst = dest;
+   inst->tex_unit = tex_unit;
+   inst->target = target;
+   inst->sampler = sampler;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+
    return inst;
 }
+   
 
-static struct prog_instruction * emit_op(struct brw_wm_compile *c,
-                                        GLuint op,
-                                        struct prog_dst_register dest,
-                                        GLuint saturate,
-                                        struct prog_src_register src0,
-                                        struct prog_src_register src1,
-                                        struct prog_src_register src2 )
+static INLINE void emit_op3(struct brw_wm_compile *c,
+                           GLuint op,
+                           struct brw_fp_dst dest,
+                           struct brw_fp_src src0,
+                           struct brw_fp_src src1,
+                           struct brw_fp_src src2 )
 {
-   struct prog_instruction *inst = get_fp_inst(c);
-      
-   memset(inst, 0, sizeof(*inst));
-
-   inst->Opcode = op;
-   inst->DstReg = dest;
-   inst->SaturateMode = saturate;   
-   inst->SrcReg[0] = src0;
-   inst->SrcReg[1] = src1;
-   inst->SrcReg[2] = src2;
-   return inst;
+   emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2);
+}
+
+
+static INLINE void emit_op2(struct brw_wm_compile *c,
+                           GLuint op,
+                           struct brw_fp_dst dest,
+                           struct brw_fp_src src0,
+                           struct brw_fp_src src1)
+{
+   emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef());
+}
+
+static INLINE void emit_op1(struct brw_wm_compile *c,
+                           GLuint op,
+                           struct brw_fp_dst dest,
+                           struct brw_fp_src src0)
+{
+   emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef());
+}
+
+static INLINE void emit_op0(struct brw_wm_compile *c,
+                          GLuint op,
+                          struct brw_fp_dst dest)
+{
+   emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef());
 }
 
 
+
 /* Many opcodes produce the same value across all the result channels.
  * We'd rather not have to support that splatting in the opcode implementations,
  * and brw_wm_pass*.c wants to optimize them out by shuffling references around
  * anyway.  We can easily get both by emitting the opcode to one channel, and
  * then MOVing it to the others, which brw_wm_pass*.c already understands.
  */
-static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
-                                                const struct prog_instruction *inst0)
+static void emit_scalar_insn(struct brw_wm_compile *c,
+                            unsigned opcode,
+                            struct brw_fp_dst dst,
+                            struct brw_fp_src src0,
+                            struct brw_fp_src src1,
+                            struct brw_fp_src src2 )
 {
-   struct prog_instruction *inst;
-   unsigned int dst_chan;
-   unsigned int other_channel_mask;
-
-   if (inst0->DstReg.WriteMask == 0)
-      return NULL;
-
-   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
-   inst = get_fp_inst(c);
-   *inst = *inst0;
-   inst->DstReg.WriteMask = 1 << dst_chan;
-
-   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
-   if (other_channel_mask != 0) {
-      inst = emit_op(c,
-                    TGSI_OPCODE_MOV,
-                    dst_mask(inst0->DstReg, other_channel_mask),
-                    0,
-                    src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
-                    src_undef(),
-                    src_undef());
+   unsigned first_chan = ffs(dst.writemask) - 1;
+   unsigned first_mask = 1 << first_chan;
+
+   if (dst.writemask == 0)
+      return;
+
+   emit_op3( c, opcode,
+            dst_mask(dst, first_mask),
+            src0, src1, src2 );
+
+   if (dst.writemask != first_mask) {
+      emit_op1(c, TGSI_OPCODE_MOV,
+              dst_mask(dst, ~first_mask),
+              src_scalar(src_reg_from_dst(dst), first_chan));
    }
-   return inst;
 }
 
 
@@ -235,11 +379,11 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
  * Special instructions for interpolation and other tasks
  */
 
-static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->pixel_xy)) {
-      struct prog_dst_register pixel_xy = get_temp(c);
-      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+   if (src_is_undef(c->fp_pixel_xy)) {
+      struct brw_fp_dst pixel_xy = get_temp(c);
+      struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       
       /* Emit the out calculations, and hold onto the results.  Use
@@ -247,349 +391,276 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
        */   
       /* pixel_xy.xy = PIXELXY payload[0];
        */
-      emit_op(c,
-             WM_PIXELXY,
-             dst_mask(pixel_xy, WRITEMASK_XY),
-             0,
-             payload_r0_depth,
-             src_undef(),
-             src_undef());
+      emit_op1(c,
+              WM_PIXELXY,
+              dst_mask(pixel_xy, BRW_WRITEMASK_XY),
+              payload_r0_depth);
 
-      c->pixel_xy = src_reg_from_dst(pixel_xy);
+      c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
    }
 
-   return c->pixel_xy;
+   return c->fp_pixel_xy;
 }
 
-static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->delta_xy)) {
-      struct prog_dst_register delta_xy = get_temp(c);
-      struct prog_src_register pixel_xy = get_pixel_xy(c);
-      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+   if (src_is_undef(c->fp_delta_xy)) {
+      struct brw_fp_dst delta_xy = get_temp(c);
+      struct brw_fp_src pixel_xy = get_pixel_xy(c);
+      struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       /* deltas.xy = DELTAXY pixel_xy, payload[0]
        */
-      emit_op(c,
+      emit_op3(c,
              WM_DELTAXY,
-             dst_mask(delta_xy, WRITEMASK_XY),
-             0,
+             dst_mask(delta_xy, BRW_WRITEMASK_XY),
              pixel_xy, 
              payload_r0_depth,
              src_undef());
       
-      c->delta_xy = src_reg_from_dst(delta_xy);
+      c->fp_delta_xy = src_reg_from_dst(delta_xy);
    }
 
-   return c->delta_xy;
+   return c->fp_delta_xy;
 }
 
-static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->pixel_w)) {
-      struct prog_dst_register pixel_w = get_temp(c);
-      struct prog_src_register deltas = get_delta_xy(c);
-      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+   if (src_is_undef(c->fp_pixel_w)) {
+      struct brw_fp_dst pixel_w = get_temp(c);
+      struct brw_fp_src deltas = get_delta_xy(c);
+
+      /* XXX: assuming position is always first -- valid? 
+       */
+      struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
 
       /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
        */
-      emit_op(c,
-             WM_PIXELW,
-             dst_mask(pixel_w, WRITEMASK_W),
-             0,
-             interp_wpos,
-             deltas, 
-             src_undef());
+      emit_op3(c,
+              WM_PIXELW,
+              dst_mask(pixel_w, BRW_WRITEMASK_W),
+              interp_wpos,
+              deltas, 
+              src_undef());
       
 
-      c->pixel_w = src_reg_from_dst(pixel_w);
+      c->fp_pixel_w = src_reg_from_dst(pixel_w);
    }
 
-   return c->pixel_w;
+   return c->fp_pixel_w;
 }
 
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
 static void emit_interp( struct brw_wm_compile *c,
+                        GLuint idx,
                         GLuint semantic,
-                        GLuint semantic_index,
                         GLuint interp_mode )
 {
-   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
-   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-   struct prog_src_register deltas = get_delta_xy(c);
+   struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+   struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
+   struct brw_fp_src deltas = get_delta_xy(c);
 
    /* Need to use PINTERP on attributes which have been
     * multiplied by 1/W in the SF program, and LINTERP on those
     * which have not:
     */
    switch (semantic) {
-   case FRAG_ATTRIB_WPOS:
+   case TGSI_SEMANTIC_POSITION:
       /* Have to treat wpos.xy specially:
        */
-      emit_op(c,
+      emit_op1(c,
              WM_WPOSXY,
-             dst_mask(dst, WRITEMASK_XY),
-             0,
-             get_pixel_xy(c),
-             src_undef(),
-             src_undef());
+             dst_mask(dst, BRW_WRITEMASK_XY),
+             get_pixel_xy(c));
       
-      dst = dst_mask(dst, WRITEMASK_ZW);
-
-      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+      /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
        */
-      emit_op(c,
-             WM_LINTERP,
-             dst,
-             0,
-             interp,
-             deltas,
-             src_undef());
+      emit_op2(c,
+              WM_LINTERP,
+              dst_mask(dst, BRW_WRITEMASK_ZW),
+              interp,
+              deltas);
       break;
 
    case TGSI_SEMANTIC_COLOR:
       if (c->key.flat_shade) {
-        emit_op(c,
+        emit_op1(c,
                 WM_CINTERP,
                 dst,
-                0,
-                interp,
-                src_undef(),
-                src_undef());
+                interp);
+      }
+      else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
+        emit_op2(c,
+                 WM_LINTERP,
+                 dst,
+                 interp,
+                 deltas);
       }
       else {
-        emit_op(c,
-                translate_interp_mode(interp_mode),
-                dst,
-                0,
-                interp,
-                deltas,
-                src_undef());
+        emit_op3(c,
+                 WM_PINTERP,
+                 dst,
+                 interp,
+                 deltas,
+                 get_pixel_w(c));
       }
+
       break;
-   case FRAG_ATTRIB_FOGC:
+
+   case TGSI_SEMANTIC_FOG:
       /* Interpolate the fog coordinate */
-      emit_op(c,
+      emit_op3(c,
              WM_PINTERP,
-             dst_mask(dst, WRITEMASK_X),
-             0,
+             dst_mask(dst, BRW_WRITEMASK_X),
              interp,
              deltas,
              get_pixel_w(c));
 
-      emit_op(c,
-             TGSI_OPCODE_MOV,
-             dst_mask(dst, WRITEMASK_YZW),
-             0,
-             src_swizzle(interp,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ONE),
-             src_undef(),
-             src_undef());
+      emit_op1(c,
+              TGSI_OPCODE_MOV,
+              dst_mask(dst, BRW_WRITEMASK_YZ),
+              src_imm1f(c, 0.0));
+
+      emit_op1(c,
+              TGSI_OPCODE_MOV,
+              dst_mask(dst, BRW_WRITEMASK_W),
+              src_imm1f(c, 1.0));
       break;
 
-   case FRAG_ATTRIB_FACE:
+   case TGSI_SEMANTIC_FACE:
       /* XXX review/test this case */
-      emit_op(c,
-              WM_FRONTFACING,
-              dst_mask(dst, WRITEMASK_X),
-              0,
-              src_undef(),
-              src_undef(),
-              src_undef());
+      emit_op0(c,
+              WM_FRONTFACING,
+              dst_mask(dst, BRW_WRITEMASK_X));
+      
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_YZ),
+              src_imm1f(c, 0.0));
+
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_W),
+              src_imm1f(c, 1.0));
       break;
 
-   case FRAG_ATTRIB_PNTC:
+   case TGSI_SEMANTIC_PSIZE:
       /* XXX review/test this case */
-      emit_op(c,
-             WM_PINTERP,
-             dst_mask(dst, WRITEMASK_XY),
-             0,
-             interp,
-             deltas,
-             get_pixel_w(c));
+      emit_op3(c,
+              WM_PINTERP,
+              dst_mask(dst, BRW_WRITEMASK_XY),
+              interp,
+              deltas,
+              get_pixel_w(c));
+
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_Z),
+             src_imm1f(c, 0.0f));
 
-      emit_op(c,
+      emit_op1(c,
              TGSI_OPCODE_MOV,
-             dst_mask(dst, WRITEMASK_ZW),
-             0,
-             src_swizzle(interp,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ONE),
-             src_undef(),
-             src_undef());
+             dst_mask(dst, BRW_WRITEMASK_W),
+             src_imm1f(c, 1.0f));
       break;
 
-   default:
-      emit_op(c,
-             translate_interp_mode(interp_mode),
-             dst,
-             0,
-             interp,
-             deltas,
-             get_pixel_w(c));
-      break;
-   }
-}
+   default: 
+      switch (interp_mode) {
+      case TGSI_INTERPOLATE_CONSTANT:
+        emit_op1(c,
+                 WM_CINTERP,
+                 dst,
+                 interp);
+        break;
 
-/***********************************************************************
- * Hacks to extend the program parameter and constant lists.
- */
+      case TGSI_INTERPOLATE_LINEAR:
+        emit_op2(c,
+                 WM_LINTERP,
+                 dst,
+                 interp,
+                 deltas);
+        break;
 
-/* Add the fog parameters to the parameter list of the original
- * program, rather than creating a new list.  Doesn't really do any
- * harm and it's not as if the parameter handling isn't a big hack
- * anyway.
- */
-static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 
-                                                     GLint s0,
-                                                     GLint s1,
-                                                     GLint s2,
-                                                     GLint s3,
-                                                     GLint s4)
-{
-   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   gl_state_index tokens[STATE_LENGTH];
-   GLuint idx;
-   tokens[0] = s0;
-   tokens[1] = s1;
-   tokens[2] = s2;
-   tokens[3] = s3;
-   tokens[4] = s4;
-   
-   for (idx = 0; idx < paramList->NumParameters; idx++) {
-      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
-         memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
-        return src_reg(PROGRAM_STATE_VAR, idx);
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+        emit_op3(c,
+                 WM_PINTERP,
+                 dst,
+                 interp,
+                 deltas,
+                 get_pixel_w(c));
+        break;
+      }
+      break;
    }
-
-   idx = _mesa_add_state_reference( paramList, tokens );
-
-   return src_reg(PROGRAM_STATE_VAR, idx);
 }
 
 
-static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 
-                                                    GLfloat s0,
-                                                    GLfloat s1,
-                                                    GLfloat s2,
-                                                    GLfloat s3)
-{
-   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   GLfloat values[4];
-   GLuint idx;
-   GLuint swizzle;
-
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
-
-   /* Have to search, otherwise multiple compilations will each grow
-    * the parameter list.
-    */
-   for (idx = 0; idx < paramList->NumParameters; idx++) {
-      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
-         memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
-
-        /* XXX: this mimics the mesa bug which puts all constants and
-         * parameters into the "PROGRAM_STATE_VAR" category:
-         */
-        return src_reg(PROGRAM_STATE_VAR, idx);
-   }
-   
-   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
-   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
-   return src_reg(PROGRAM_STATE_VAR, idx);
-}
-
-
-
 /***********************************************************************
  * Expand various instructions here to simpler forms.  
  */
 static void precalc_dst( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
+                        struct brw_fp_dst dst,
+                        struct brw_fp_src src0,
+                        struct brw_fp_src src1 )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
-   struct prog_src_register src1 = inst->SrcReg[1];
-   struct prog_dst_register dst = inst->DstReg;
-   
-   if (dst.WriteMask & WRITEMASK_Y) {      
+   if (dst.writemask & BRW_WRITEMASK_Y) {      
       /* dst.y = mul src0.y, src1.y
        */
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             dst_mask(dst, WRITEMASK_Y),
-             inst->SaturateMode,
-             src0,
-             src1,
-             src_undef());
+      emit_op2(c,
+              TGSI_OPCODE_MUL,
+              dst_mask(dst, BRW_WRITEMASK_Y),
+              src0,
+              src1);
    }
 
-   if (dst.WriteMask & WRITEMASK_XZ) {
-      struct prog_instruction *swz;
-      GLuint z = GET_SWZ(src0.Swizzle, Z);
+   if (dst.writemask & BRW_WRITEMASK_XZ) {
+      /* dst.z = mov src0.zzzz
+       */
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_Z),
+             src_scalar(src0, Z));
 
-      /* dst.xz = swz src0.1zzz
+      /* dst.x = imm1f(1.0)
        */
-      swz = emit_op(c,
-                   TGSI_OPCODE_MOV,
-                   dst_mask(dst, WRITEMASK_XZ),
-                   inst->SaturateMode,
-                   src_swizzle(src0, SWIZZLE_ONE, z, z, z),
-                   src_undef(),
-                   src_undef());
-      /* Avoid letting negation flag of src0 affect our 1 constant. */
-      swz->SrcReg[0].Negate &= ~NEGATE_X;
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+             src_imm1f(c, 1.0));
    }
-   if (dst.WriteMask & WRITEMASK_W) {
+   if (dst.writemask & BRW_WRITEMASK_W) {
       /* dst.w = mov src1.w
        */
-      emit_op(c,
-             TGSI_OPCODE_MOV,
-             dst_mask(dst, WRITEMASK_W),
-             inst->SaturateMode,
-             src1,
-             src_undef(),
-             src_undef());
+      emit_op1(c,
+              TGSI_OPCODE_MOV,
+              dst_mask(dst, BRW_WRITEMASK_W),
+              src1);
    }
 }
 
 
 static void precalc_lit( struct brw_wm_compile *c,
-                        const struct prog_instruction *inst )
+                        struct brw_fp_dst dst,
+                        struct brw_fp_src src0 )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
-   struct prog_dst_register dst = inst->DstReg;
-   
-   if (dst.WriteMask & WRITEMASK_XW) {
-      struct prog_instruction *swz;
-
-      /* dst.xw = swz src0.1111
+   if (dst.writemask & BRW_WRITEMASK_XW) {
+      /* dst.xw = imm(1.0f)
        */
-      swz = emit_op(c,
-                   TGSI_OPCODE_MOV,
-                   dst_mask(dst, WRITEMASK_XW),
-                   0,
-                   src_swizzle1(src0, SWIZZLE_ONE),
-                   src_undef(),
-                   src_undef());
-      /* Avoid letting the negation flag of src0 affect our 1 constant. */
-      swz->SrcReg[0].Negate = NEGATE_NONE;
+      emit_op1(c,
+              TGSI_OPCODE_MOV,
+              dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
+              src_imm1f(c, 1.0f));
    }
 
-   if (dst.WriteMask & WRITEMASK_YZ) {
-      emit_op(c,
-             TGSI_OPCODE_LIT,
-             dst_mask(dst, WRITEMASK_YZ),
-             inst->SaturateMode,
-             src0,
-             src_undef(),
-             src_undef());
+   if (dst.writemask & BRW_WRITEMASK_YZ) {
+      emit_op1(c,
+              TGSI_OPCODE_LIT,
+              dst_mask(dst, BRW_WRITEMASK_YZ),
+              src0);
    }
 }
 
@@ -601,99 +672,65 @@ static void precalc_lit( struct brw_wm_compile *c,
  * instruction itself.
  */
 static void precalc_tex( struct brw_wm_compile *c,
-                        const struct prog_instruction *inst )
+                        struct brw_fp_dst dst,
+                        unsigned target,
+                        unsigned unit,
+                        struct brw_fp_src src0,
+                        struct brw_fp_src sampler )
 {
-   struct prog_src_register coord;
-   struct prog_dst_register tmpcoord;
-   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+   struct brw_fp_src coord;
+   struct brw_fp_dst tmp = dst_undef();
 
    assert(unit < BRW_MAX_TEX_UNIT);
 
-   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
-       struct prog_instruction *out;
-       struct prog_dst_register tmp0 = get_temp(c);
-       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
-       struct prog_dst_register tmp1 = get_temp(c);
-       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
-       struct prog_src_register src0 = inst->SrcReg[0];
-
-       /* find longest component of coord vector and normalize it */
-       tmpcoord = get_temp(c);
-       coord = src_reg_from_dst(tmpcoord);
-
-       /* tmpcoord = src0 (i.e.: coord = src0) */
-       out = emit_op(c, TGSI_OPCODE_MOV,
-                     tmpcoord,
-                     0,
-                     src0,
-                     src_undef(),
-                     src_undef());
-       out->SrcReg[0].Negate = NEGATE_NONE;
-       out->SrcReg[0].Abs = 1;
-
-       /* tmp0 = MAX(coord.X, coord.Y) */
-       emit_op(c, TGSI_OPCODE_MAX,
-               tmp0,
-               0,
-               src_swizzle1(coord, X),
-               src_swizzle1(coord, Y),
-               src_undef());
-
-       /* tmp1 = MAX(tmp0, coord.Z) */
-       emit_op(c, TGSI_OPCODE_MAX,
-               tmp1,
-               0,
-               tmp0src,
-               src_swizzle1(coord, Z),
-               src_undef());
-
-       /* tmp0 = 1 / tmp1 */
-       emit_op(c, TGSI_OPCODE_RCP,
-               dst_mask(tmp0, WRITEMASK_X),
-               0,
-               tmp1src,
-               src_undef(),
-               src_undef());
-
-       /* tmpCoord = src0 * tmp0 */
-       emit_op(c, TGSI_OPCODE_MUL,
-               tmpcoord,
-               0,
-               src0,
-               src_swizzle1(tmp0src, SWIZZLE_X),
-               src_undef());
-
-       release_temp(c, tmp0);
-       release_temp(c, tmp1);
+   /* Cubemap: find longest component of coord vector and normalize
+    * it.
+    */
+   if (target == TGSI_TEXTURE_CUBE) {
+      struct brw_fp_src tmpsrc;
+
+      tmp = get_temp(c);
+      tmpsrc = src_reg_from_dst(tmp);
+
+      /* tmp = abs(src0) */
+      emit_op1(c, 
+              TGSI_OPCODE_MOV,
+              tmp,
+              src_abs(src0));
+
+      /* tmp.X = MAX(tmp.X, tmp.Y) */
+      emit_op2(c, TGSI_OPCODE_MAX,
+              dst_mask(tmp, BRW_WRITEMASK_X),
+              src_scalar(tmpsrc, X),
+              src_scalar(tmpsrc, Y));
+
+      /* tmp.X = MAX(tmp.X, tmp.Z) */
+      emit_op2(c, TGSI_OPCODE_MAX,
+              dst_mask(tmp, BRW_WRITEMASK_X),
+              tmpsrc,
+              src_scalar(tmpsrc, Z));
+
+      /* tmp.X = 1 / tmp.X */
+      emit_op1(c, TGSI_OPCODE_RCP,
+             dst_mask(tmp, BRW_WRITEMASK_X),
+             tmpsrc);
+
+      /* tmp = src0 * tmp.xxxx */
+      emit_op2(c, TGSI_OPCODE_MUL,
+              tmp,
+              src0,
+              src_scalar(tmpsrc, X));
+
+      coord = tmpsrc;
    }
-   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
-      struct prog_src_register scale = 
-        search_or_add_param5( c, 
-                              STATE_INTERNAL, 
-                              STATE_TEXRECT_SCALE,
-                              unit,
-                              0,0 );
-
-      tmpcoord = get_temp(c);
-
-      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
+   else if (target == TGSI_TEXTURE_RECT ||
+           target == TGSI_TEXTURE_SHADOWRECT) {
+      /* XXX: need a mechanism for internally generated constants.
        */
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             tmpcoord,
-             0,
-             inst->SrcReg[0],
-             src_swizzle(scale,
-                         SWIZZLE_X,
-                         SWIZZLE_Y,
-                         SWIZZLE_ONE,
-                         SWIZZLE_ONE),
-             src_undef());
-
-      coord = src_reg_from_dst(tmpcoord);
+      coord = src0;
    }
    else {
-      coord = inst->SrcReg[0];
+      coord = src0;
    }
 
    /* Need to emit YUV texture conversions by hand.  Probably need to
@@ -704,58 +741,36 @@ static void precalc_tex( struct brw_wm_compile *c,
    if (c->key.yuvtex_mask & (1 << unit)) {
       /* convert ycbcr to RGBA */
       GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
-
-      /* 
-        CONST C0 = { -.5, -.0625,  -.5, 1.164 }
-        CONST C1 = { 1.596, -0.813, 2.018, -.391 }
-        UYV     = TEX ...
-        UYV.xyz = ADD UYV,     C0
-        UYV.y   = MUL UYV.y,   C0.w
-        if (UV swaped)
-           RGB.xyz = MAD UYV.zzx, C1,   UYV.y
-        else
-           RGB.xyz = MAD UYV.xxz, C1,   UYV.y 
-        RGB.y   = MAD UYV.z,   C1.w, RGB.y
-      */
-      struct prog_dst_register dst = inst->DstReg;
-      struct prog_dst_register tmp = get_temp(c);
-      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
-      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
-      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+      struct brw_fp_dst tmp = get_temp(c);
+      struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
+      struct brw_fp_src C0 = src_imm4f( c,  -.5, -.0625, -.5, 1.164 );
+      struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
      
       /* tmp     = TEX ...
        */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  tmp,
-                  inst->SaturateMode,
+                  dst_saturate(tmp, dst.saturate),
                   unit,
-                  inst->TexSrcTarget,
-                  inst->TexShadow,
+                  target,
+                  sampler.index,
                   coord,
                   src_undef(),
                   src_undef());
 
       /* tmp.xyz =  ADD TMP, C0
        */
-      emit_op(c,
-             TGSI_OPCODE_ADD,
-             dst_mask(tmp, WRITEMASK_XYZ),
-             0,
-             tmpsrc,
-             C0,
-             src_undef());
+      emit_op2(c, TGSI_OPCODE_ADD,
+              dst_mask(tmp, BRW_WRITEMASK_XYZ),
+              tmpsrc,
+              C0);
 
       /* YUV.y   = MUL YUV.y, C0.w
        */
-
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             dst_mask(tmp, WRITEMASK_Y),
-             0,
-             tmpsrc,
-             src_swizzle1(C0, W),
-             src_undef());
+      emit_op2(c, TGSI_OPCODE_MUL,
+              dst_mask(tmp, BRW_WRITEMASK_Y),
+              tmpsrc,
+              src_scalar(C0, W));
 
       /* 
        * if (UV swaped)
@@ -764,23 +779,22 @@ static void precalc_tex( struct brw_wm_compile *c,
        *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
        */
 
-      emit_op(c,
-             TGSI_OPCODE_MAD,
-             dst_mask(dst, WRITEMASK_XYZ),
-             0,
-             swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
-             C1,
-             src_swizzle1(tmpsrc, Y));
+      emit_op3(c, TGSI_OPCODE_MAD,
+              dst_mask(dst, BRW_WRITEMASK_XYZ),
+              ( swap_uv ? 
+                src_swizzle(tmpsrc, Z,Z,X,X) : 
+                src_swizzle(tmpsrc, X,X,Z,Z)),
+              C1,
+              src_scalar(tmpsrc, Y));
 
       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
        */
-      emit_op(c,
-             TGSI_OPCODE_MAD,
-             dst_mask(dst, WRITEMASK_Y),
-             0,
-             src_swizzle1(tmpsrc, Z),
-             src_swizzle1(C1, W),
-             src_swizzle1(src_reg_from_dst(dst), Y));
+      emit_op3(c,
+              TGSI_OPCODE_MAD,
+              dst_mask(dst, BRW_WRITEMASK_Y),
+              src_scalar(tmpsrc, Z),
+              src_scalar(C1, W),
+              src_scalar(src_reg_from_dst(dst), Y));
 
       release_temp(c, tmp);
    }
@@ -788,31 +802,23 @@ static void precalc_tex( struct brw_wm_compile *c,
       /* ordinary RGBA tex instruction */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  inst->DstReg,
-                  inst->SaturateMode,
+                  dst,
                   unit,
-                  inst->TexSrcTarget,
-                  inst->TexShadow,
+                  target,
+                  sampler.index,
                   coord,
                   src_undef(),
                   src_undef());
    }
 
-   /* For GL_EXT_texture_swizzle: */
-   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
-      /* swizzle the result of the TEX instruction */
-      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
-      emit_op(c, TGSI_OPCODE_MOV,
-              inst->DstReg,
-              SATURATE_OFF, /* saturate already done above */
-              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
-              src_undef(),
-              src_undef());
-   }
+   /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
+    * generating shader varients in mesa state tracker.
+    */
 
-   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
-       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
-      release_temp(c, tmpcoord);
+   /* Release this temp if we ended up allocating it:
+    */
+   if (!dst_is_undef(tmp))
+      release_temp(c, tmp);
 }
 
 
@@ -820,13 +826,9 @@ static void precalc_tex( struct brw_wm_compile *c,
  * Check if the given TXP instruction really needs the divide-by-W step.
  */
 static GLboolean projtex( struct brw_wm_compile *c,
-                         const struct prog_instruction *inst )
+                         unsigned target, 
+                         struct brw_fp_src src )
 {
-   const struct prog_src_register src = inst->SrcReg[0];
-   GLboolean retVal;
-
-   assert(inst->Opcode == TGSI_OPCODE_TXP);
-
    /* Only try to detect the simplest cases.  Could detect (later)
     * cases where we are trying to emit code like RCP {1.0}, MUL x,
     * {1.0}, and so on.
@@ -834,16 +836,15 @@ static GLboolean projtex( struct brw_wm_compile *c,
     * More complex cases than this typically only arise from
     * user-provided fragment programs anyway:
     */
-   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
-      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
-   else if (src.File == PROGRAM_INPUT && 
-           GET_SWZ(src.Swizzle, W) == W &&
-            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
-      retVal = GL_FALSE;
-   else
-      retVal = GL_TRUE;
+   if (target == TGSI_TEXTURE_CUBE)
+      return GL_FALSE;  /* ut2004 gun rendering !?! */
+   
+   if (src.file == TGSI_FILE_INPUT && 
+       BRW_GET_SWZ(src.swizzle, W) == W &&
+       c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
+      return GL_FALSE;
 
-   return retVal;
+   return GL_TRUE;
 }
 
 
@@ -851,262 +852,372 @@ static GLboolean projtex( struct brw_wm_compile *c,
  * Emit code for TXP.
  */
 static void precalc_txp( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
+                        struct brw_fp_dst dst,
+                        unsigned target,
+                        unsigned unit,
+                        struct brw_fp_src src0,
+                         struct brw_fp_src sampler )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
-
-   if (projtex(c, inst)) {
-      struct prog_dst_register tmp = get_temp(c);
-      struct prog_instruction tmp_inst;
+   if (projtex(c, target, src0)) {
+      struct brw_fp_dst tmp = get_temp(c);
 
       /* tmp0.w = RCP inst.arg[0][3]
        */
-      emit_op(c,
+      emit_op1(c,
              TGSI_OPCODE_RCP,
-             dst_mask(tmp, WRITEMASK_W),
-             0,
-             src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
-             src_undef(),
-             src_undef());
+             dst_mask(tmp, BRW_WRITEMASK_W),
+             src_scalar(src0, W));
 
       /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
        */
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             dst_mask(tmp, WRITEMASK_XYZ),
-             0,
-             src0,
-             src_swizzle1(src_reg_from_dst(tmp), W),
-             src_undef());
+      emit_op2(c,
+              TGSI_OPCODE_MUL,
+              dst_mask(tmp, BRW_WRITEMASK_XYZ),
+              src0,
+              src_scalar(src_reg_from_dst(tmp), W));
 
-      /* dst = precalc(TEX tmp0)
+      /* dst = TEX tmp0
        */
-      tmp_inst = *inst;
-      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
-      precalc_tex(c, &tmp_inst);
+      precalc_tex(c, 
+                 dst,
+                 target,
+                 unit,
+                 src_reg_from_dst(tmp),
+                  sampler );
 
       release_temp(c, tmp);
    }
    else
    {
-      /* dst = precalc(TEX src0)
+      /* dst = TEX src0
        */
-      precalc_tex(c, inst);
+      precalc_tex(c, dst, target, unit, src0, sampler);
    }
 }
 
 
+/* XXX: note this returns a src_reg.
+ */
+static struct brw_fp_src
+find_output_by_semantic( struct brw_wm_compile *c,
+                        unsigned semantic,
+                        unsigned index )
+{
+   const struct tgsi_shader_info *info = &c->fp->info;
+   unsigned i;
+
+   for (i = 0; i < info->num_outputs; i++)
+      if (info->output_semantic_name[i] == semantic &&
+         info->output_semantic_index[i] == index)
+        return src_reg( TGSI_FILE_OUTPUT, i );
+
+   /* If not found, return some arbitrary immediate value:
+    *
+    * XXX: this is a good idea but immediates are up generating extra
+    * curbe entries atm, as they would have in the original driver.
+    */
+   return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
+}
+
 
 static void emit_fb_write( struct brw_wm_compile *c )
 {
-   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
-   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
-   struct prog_src_register outcolor;
+   struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
    GLuint i;
 
-   struct prog_instruction *inst, *last_inst;
-   struct brw_context *brw = c->func.brw;
-
-   /* The inst->Aux field is used for FB write target and the EOT marker */
-
-   if (brw->state.nr_color_regions > 1) {
-      for (i = 0 ; i < brw->state.nr_color_regions; i++) {
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
-         last_inst = inst = emit_op(c,
-                                    WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
-                                    outcolor, payload_r0_depth, outdepth);
-         inst->Aux = (i<<1);
-         if (c->fp_fragcolor_emitted) {
-            outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
-            last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
-                                       0, outcolor, payload_r0_depth, outdepth);
-            inst->Aux = (i<<1);
-         }
-      }
-      last_inst->Aux |= 1; //eot
-   }
-   else {
-      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
-      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
-      else 
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
-
-      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
-                     0, outcolor, payload_r0_depth, outdepth);
-      inst->Aux = 1|(0<<1);
-   }
-}
 
+   outdepth = src_scalar(outdepth, Z);
 
+   for (i = 0 ; i < c->key.nr_cbufs; i++) {
+      struct brw_fp_src outcolor;
+      
+      outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
 
+      /* Use emit_tex_op so that we can specify the inst->target
+       * field, which is abused to contain the FB write target and the
+       * EOT marker
+       */
+      emit_tex_op(c, WM_FB_WRITE,
+                 dst_undef(),
+                 (i == c->key.nr_cbufs - 1), /* EOT */
+                 i,
+                  0,            /* no sampler */
+                 outcolor,
+                 payload_r0_depth,
+                 outdepth);
+   }
+}
 
-/***********************************************************************
- * Emit INTERP instructions ahead of first use of each attrib.
- */
 
-static void validate_src_regs( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
+static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
+                                       const struct tgsi_full_dst_register *dst,
+                                       unsigned saturate )
 {
-   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
-   GLuint i;
+   struct brw_fp_dst out;
 
-   for (i = 0; i < nr_args; i++) {
-      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
-        GLuint idx = inst->SrcReg[i].Index;
-        if (!(c->fp_interp_emitted & (1<<idx))) {
-           emit_interp(c, idx);
-           c->fp_interp_emitted |= 1<<idx;
-        }
-      }
+   out.file = dst->Register.File;
+   out.index = dst->Register.Index;
+   out.writemask = dst->Register.WriteMask;
+   out.indirect = dst->Register.Indirect;
+   out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
+   
+   if (out.indirect) {
+      assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
+      assert(dst->Indirect.Index == 0);
    }
+   
+   return out;
 }
-        
-static void validate_dst_regs( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
+
+
+static struct brw_fp_src translate_src( struct brw_wm_compile *c,
+                                       const struct tgsi_full_src_register *src )
 {
-   if (inst->DstReg.File == PROGRAM_OUTPUT) {
-      GLuint idx = inst->DstReg.Index;
-      if (idx == FRAG_RESULT_COLOR)
-         c->fp_fragcolor_emitted = 1;
+   struct brw_fp_src out;
+
+   out.file = src->Register.File;
+   out.index = src->Register.Index;
+   out.indirect = src->Register.Indirect;
+
+   out.swizzle = ((src->Register.SwizzleX << 0) |
+                 (src->Register.SwizzleY << 2) |
+                 (src->Register.SwizzleZ << 4) |
+                 (src->Register.SwizzleW << 6));
+   
+   switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      out.abs = 1;
+      out.negate = 0;
+      break;
+
+   case TGSI_UTIL_SIGN_SET:
+      out.abs = 1;
+      out.negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      out.abs = 0;
+      out.negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+   default:
+      out.abs = 0;
+      out.negate = 0;
+      break;
+   }
+
+   if (out.indirect) {
+      assert(src->Indirect.File == TGSI_FILE_ADDRESS);
+      assert(src->Indirect.Index == 0);
    }
+   
+   return out;
 }
 
-static void print_insns( const struct prog_instruction *insn,
-                        GLuint nr )
+
+
+static void emit_insn( struct brw_wm_compile *c,
+                      const struct tgsi_full_instruction *inst )
 {
-   GLuint i;
-   for (i = 0; i < nr; i++, insn++) {
-      _mesa_printf("%3d: ", i);
-      if (insn->Opcode < MAX_OPCODE)
-        _mesa_print_instruction(insn);
-      else if (insn->Opcode < MAX_WM_OPCODE) {
-        GLuint idx = insn->Opcode - MAX_OPCODE;
-
-        _mesa_print_alu_instruction(insn,
-                                    wm_opcode_strings[idx],
-                                    3);
-      }
-      else 
-        _mesa_printf("965 Opcode %d\n", insn->Opcode);
+   unsigned opcode = inst->Instruction.Opcode;
+   struct brw_fp_dst dst;
+   struct brw_fp_src src[3];
+   int i;
+
+   dst = translate_dst( c, &inst->Dst[0],
+                       inst->Instruction.Saturate );
+
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
+      src[i] = translate_src( c, &inst->Src[i] );
+   
+   switch (opcode) {
+   case TGSI_OPCODE_ABS:
+      emit_op1(c, TGSI_OPCODE_MOV,
+              dst, 
+              src_abs(src[0]));
+      break;
+
+   case TGSI_OPCODE_SUB: 
+      emit_op2(c, TGSI_OPCODE_ADD,
+              dst,
+              src[0],
+              src_negate(src[1]));
+      break;
+
+   case TGSI_OPCODE_SCS: 
+      emit_op1(c, TGSI_OPCODE_SCS,
+              dst_mask(dst, BRW_WRITEMASK_XY),
+              src[0]);
+      break;
+        
+   case TGSI_OPCODE_DST:
+      precalc_dst(c, dst, src[0], src[1]);
+      break;
+
+   case TGSI_OPCODE_LIT:
+      precalc_lit(c, dst, src[0]);
+      break;
+
+   case TGSI_OPCODE_TEX:
+      precalc_tex(c, dst,
+                 inst->Texture.Texture,
+                 src[1].index, /* use sampler unit for tex idx */
+                 src[0],       /* coord */
+                  src[1]);      /* sampler */
+      break;
+
+   case TGSI_OPCODE_TXP:
+      precalc_txp(c, dst,
+                 inst->Texture.Texture,
+                 src[1].index, /* use sampler unit for tex idx */
+                 src[0],       /* coord */
+                  src[1]);      /* sampler */
+      break;
+
+   case TGSI_OPCODE_TXB:
+      /* XXX: TXB not done
+       */
+      precalc_tex(c, dst,
+                 inst->Texture.Texture,
+                 src[1].index, /* use sampler unit for tex idx*/
+                 src[0],
+                  src[1]);
+      break;
+
+   case TGSI_OPCODE_XPD: 
+      emit_op2(c, TGSI_OPCODE_XPD,
+              dst_mask(dst, BRW_WRITEMASK_XYZ),
+              src[0], 
+              src[1]);
+      break;
+
+   case TGSI_OPCODE_KIL: 
+      emit_op1(c, TGSI_OPCODE_KIL,
+              dst_mask(dst_undef(), 0),
+              src[0]);
+      break;
+
+   case TGSI_OPCODE_END:
+      emit_fb_write(c);
+      break;
+   default:
+      if (!c->key.has_flow_control &&
+         brw_wm_is_scalar_result(opcode))
+        emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
+      else
+        emit_op3(c, opcode, dst, src[0], src[1], src[2]);
+      break;
    }
 }
 
-
 /**
  * Initial pass for fragment program code generation.
  * This function is used by both the GLSL and non-GLSL paths.
  */
-void brw_wm_pass_fp( struct brw_wm_compile *c )
+int brw_wm_pass_fp( struct brw_wm_compile *c )
 {
-   struct brw_fragment_program *fp = c->fp;
-   GLuint insn;
+   struct brw_fragment_shader *fs = c->fp;
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction *inst;
+   struct tgsi_full_declaration *decl;
+   const float *imm;
+   GLuint size;
+   GLuint i;
 
-   if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("pre-fp:\n");
-      _mesa_print_program(&fp->program.Base); 
-      _mesa_printf("\n");
+   if (BRW_DEBUG & DEBUG_WM) {
+      debug_printf("pre-fp:\n");
+      tgsi_dump(fs->tokens, 0); 
    }
 
-   c->pixel_xy = src_undef();
-   c->delta_xy = src_undef();
-   c->pixel_w = src_undef();
+   c->fp_pixel_xy = src_undef();
+   c->fp_delta_xy = src_undef();
+   c->fp_pixel_w = src_undef();
    c->nr_fp_insns = 0;
-   c->fp->tex_units_used = 0x0;
+   c->nr_immediates = 0;
 
-   /* Emit preamble instructions.  This is where special instructions such as
-    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
-    * compute shader inputs from varying vars.
-    */
-   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
-      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
-      validate_src_regs(c, inst);
-      validate_dst_regs(c, inst);
-   }
 
    /* Loop over all instructions doing assorted simplifications and
     * transformations.
     */
-   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
-      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
-      struct prog_instruction *out;
-
-      /* Check for INPUT values, emit INTERP instructions where
-       * necessary:
-       */
-
-      switch (inst->Opcode) {
-      case TGSI_OPCODE_ABS:
-        out = emit_insn(c, inst);
-        out->Opcode = TGSI_OPCODE_MOV;
-        out->SrcReg[0].Negate = NEGATE_NONE;
-        out->SrcReg[0].Abs = 1;
-        break;
-
-      case TGSI_OPCODE_SUB: 
-        out = emit_insn(c, inst);
-        out->Opcode = TGSI_OPCODE_ADD;
-        out->SrcReg[1].Negate ^= NEGATE_XYZW;
-        break;
-
-      case TGSI_OPCODE_SCS: 
-        out = emit_insn(c, inst);
-        /* This should probably be done in the parser. 
+   tgsi_parse_init( &parse, fs->tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+        /* Turn intput declarations into special WM_* instructions.
+         *
+         * XXX: For non-branching shaders, consider deferring variable
+         * initialization as late as possible to minimize register
+         * usage.  This is how the original BRW driver worked.
+         *
+         * In a branching shader, must preamble instructions at decl
+         * time, as instruction order in the shader does not
+         * correspond to the order instructions are executed in the
+         * wild.
+         *
+         * This is where special instructions such as WM_CINTERP,
+         * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+         * compute shader inputs from the payload registers and pixel
+         * position.
          */
-        out->DstReg.WriteMask &= WRITEMASK_XY;
-        break;
+         decl = &parse.FullToken.FullDeclaration;
+         if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+            unsigned first, last, mask;
+            unsigned attrib;
+
+            first = decl->Range.First;
+            last = decl->Range.Last;
+            mask = decl->Declaration.UsageMask;
+
+            for (attrib = first; attrib <= last; attrib++) {
+              emit_interp(c, 
+                          attrib, 
+                          decl->Semantic.Name,
+                          decl->Declaration.Interpolate );
+            }
+         }
         
-      case TGSI_OPCODE_DST:
-        precalc_dst(c, inst);
-        break;
-
-      case TGSI_OPCODE_LIT:
-        precalc_lit(c, inst);
-        break;
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+        /* Unlike VS programs we can probably manage fine encoding
+         * immediate values directly into the emitted EU
+         * instructions, as we probably only need to reference one
+         * float value per instruction.  Just save the data for now
+         * and use directly later.
+         */
+        i = c->nr_immediates++;
+        imm = &parse.FullToken.FullImmediate.u[i].Float;
+        size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
 
-      case TGSI_OPCODE_TEX:
-        precalc_tex(c, inst);
-        break;
+        if (c->nr_immediates >= BRW_WM_MAX_CONST)
+           return PIPE_ERROR_OUT_OF_MEMORY;
 
-      case TGSI_OPCODE_TXP:
-        precalc_txp(c, inst);
-        break;
+        for (i = 0; i < size; i++)
+           c->immediate[c->nr_immediates].v[i] = imm[i];
 
-      case TGSI_OPCODE_TXB:
-        out = emit_insn(c, inst);
-        out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
-        break;
+        for (; i < 4; i++)
+           c->immediate[c->nr_immediates].v[i] = 0.0;
 
-      case TGSI_OPCODE_XPD: 
-        out = emit_insn(c, inst);
-        /* This should probably be done in the parser. 
-         */
-        out->DstReg.WriteMask &= WRITEMASK_XYZ;
+        c->immediate[c->nr_immediates].nr = size;
+        c->nr_immediates++;
         break;
 
-      case TGSI_OPCODE_KIL: 
-        out = emit_insn(c, inst);
-        /* This should probably be done in the parser. 
-         */
-        out->DstReg.WriteMask = 0;
-        break;
-      case TGSI_OPCODE_END:
-        emit_fb_write(c);
-        break;
-      default:
-        if (brw_wm_is_scalar_result(inst->Opcode))
-           emit_scalar_insn(c, inst);
-        else
-           emit_insn(c, inst);
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         inst = &parse.FullToken.FullInstruction;
+        emit_insn(c, inst);
         break;
       }
    }
 
-   if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("pass_fp:\n");
-      print_insns( c->prog_instructions, c->nr_fp_insns );
-      _mesa_printf("\n");
+   if (BRW_DEBUG & DEBUG_WM) {
+      brw_wm_print_fp_program( c, "pass_fp" );
+      debug_printf("\n");
    }
+
+   return c->error;
 }