i965g: wip on fragment shaders
authorKeith Whitwell <keithw@vmware.com>
Sat, 31 Oct 2009 15:05:01 +0000 (15:05 +0000)
committerKeith Whitwell <keithw@vmware.com>
Sat, 31 Oct 2009 15:05:01 +0000 (15:05 +0000)
src/gallium/drivers/i965/brw_wm.h
src/gallium/drivers/i965/brw_wm_fp.c

index 2cd5bb708186a4ae4a6b07606b8f61afa276927d..8ee99420aa55aa0c6b1f5b2f28e49d1386358d8f 100644 (file)
@@ -74,6 +74,7 @@ struct brw_wm_prog_key {
 
    GLuint vp_nr_outputs:6;
    GLuint nr_cbufs:3;
+   GLuint has_flow_control:1;
 
    GLuint program_string_id;
 };
@@ -176,9 +177,36 @@ struct brw_wm_instruction {
 #define MAX_WM_OPCODE     (MAX_OPCODE + 9)
 
 #define BRW_FILE_PAYLOAD   (TGSI_FILE_COUNT)
-#define PAYLOAD_DEPTH      (FRAG_ATTRIB_MAX) /* ?? */
+#define PAYLOAD_DEPTH      (PIPE_MAX_SHADER_INPUTS) /* ?? */
+
+
+struct brw_fp_src {
+   unsigned file:4;
+   unsigned index:16;
+   unsigned swizzle:8;
+   unsigned indirect:1;
+   unsigned negate:1;
+   unsigned abs:1;
+};
+
+struct brw_fp_dst {
+   unsigned file:4;
+   unsigned index:16;
+   unsigned writemask:4;
+   unsigned indirect:1;
+   unsigned saturate:1;
+};
+
+struct brw_fp_instruction {
+   struct brw_fp_dst dst;
+   struct brw_fp_src src[3];
+   unsigned opcode:8;
+   unsigned tex_unit:4;
+   unsigned tex_target:4;
+   unsigned target:10;         /* destination surface for FB_WRITE */
+   unsigned eot:1;             /* mark last instruction (usually FB_WRITE) */
+};
 
-struct brw_passfp_program;
 
 struct brw_wm_compile {
    struct brw_compile func;
@@ -198,9 +226,26 @@ struct brw_wm_compile {
     * simplifying and adding instructions for interpolation and
     * framebuffer writes.
     */
-   struct brw_passfp_program *pass_fp;
-
-
+   struct {
+      GLfloat v[4];
+      unsigned nr;
+   } immediate[BRW_WM_MAX_CONST+3];
+   GLuint nr_immediates;
+   
+   struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN];
+   GLuint nr_fp_insns;
+   GLuint fp_temp;
+   GLuint fp_interp_emitted;
+   GLuint fp_fragcolor_emitted;
+   GLuint fp_first_internal_temp;
+
+   struct brw_fp_src fp_pixel_xy;
+   struct brw_fp_src fp_delta_xy;
+   struct brw_fp_src fp_pixel_w;
+
+
+   /* Subsequent passes using SSA representation:
+    */
    struct brw_wm_value vreg[BRW_WM_MAX_VREG];
    GLuint nr_vreg;
 
@@ -213,7 +258,7 @@ struct brw_wm_compile {
    } payload;
 
 
-   const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
+   const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4];
 
    struct brw_wm_ref undef_ref;
    struct brw_wm_value undef_value;
@@ -241,7 +286,7 @@ struct brw_wm_compile {
    struct {
       GLboolean inited;
       struct brw_reg reg;
-   } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+   } wm_regs[BRW_FILE_PAYLOAD+1][256][4];
 
    GLboolean used_grf[BRW_WM_MAX_GRF];
    GLuint first_free_grf;
@@ -258,13 +303,15 @@ struct brw_wm_compile {
       GLint index;
       struct brw_reg reg;
    } current_const[3];
+
+   GLuint error;
 };
 
 
 GLuint brw_wm_nr_args( GLuint opcode );
 GLuint brw_wm_is_scalar_result( GLuint opcode );
 
-void brw_wm_pass_fp( struct brw_wm_compile *c );
+int brw_wm_pass_fp( struct brw_wm_compile *c );
 void brw_wm_pass0( struct brw_wm_compile *c );
 void brw_wm_pass1( struct brw_wm_compile *c );
 void brw_wm_pass2( struct brw_wm_compile *c );
index 8ba037cdae7cfeb2d7142d18e2ab190394a1be52..57933afbbee4fb18ae745e262aebfad66b569cfc 100644 (file)
                
 
 #include "pipe/p_shader_tokens.h"
+#include "pipe/p_error.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_util.h"
 
 #include "brw_wm.h"
 #include "brw_util.h"
+#include "brw_debug.h"
 
 
 #define X    0
 #define Y    1
 #define Z    2
 #define W    3
+#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3)
 
 
 static const char *wm_opcode_strings[] = {
@@ -54,7 +65,294 @@ static const char *wm_opcode_strings[] = {
    "FRONTFACING",
 };
 
+/***********************************************************************
+ * Source regs
+ */
+
+static struct brw_fp_src src_reg(GLuint file, GLuint idx)
+{
+   struct brw_fp_src reg;
+   reg.file = file;
+   reg.index = idx;
+   reg.swizzle = BRW_SWIZZLE_XYZW;
+   reg.indirect = 0;
+   reg.negate = 0;
+   reg.abs = 0;
+   return reg;
+}
+
+static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
+{
+   return src_reg(dst.file, dst.index);
+}
+
+static struct brw_fp_src src_undef( void )
+{
+   return src_reg(TGSI_FILE_NULL, 0);
+}
+
+static GLboolean src_is_undef(struct brw_fp_src src)
+{
+   return src.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
+{
+   unsigned swz = reg.swizzle;
+
+   reg.swizzle = ( GET_SWZ(swz, x) << 0 |
+                  GET_SWZ(swz, y) << 2 |
+                  GET_SWZ(swz, z) << 4 |
+                  GET_SWZ(swz, w) << 6 );
+
+   return reg;
+}
+
+static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
+{
+   return src_swizzle(reg, x, x, x, x);
+}
+
+static struct brw_fp_src src_abs( struct brw_fp_src src )
+{
+   src.negate = 0;
+   src.abs = 1;
+   return src;
+}
+
+static struct brw_fp_src src_negate( struct brw_fp_src src )
+{
+   src.negate = 1;
+   src.abs = 0;
+   return src;
+}
+
+
+static int match_or_expand_immediate( const float *v,
+                                      unsigned nr,
+                                      float *v2,
+                                      unsigned *nr2,
+                                      unsigned *swizzle )
+{
+   unsigned i, j;
+   
+   *swizzle = 0;
+
+   for (i = 0; i < nr; i++) {
+      boolean found = FALSE;
+
+      for (j = 0; j < *nr2 && !found; j++) {
+         if (v[i] == v2[j]) {
+            *swizzle |= j << (i * 2);
+            found = TRUE;
+         }
+      }
+
+      if (!found) {
+         if (*nr2 >= 4) 
+            return FALSE;
+
+         v2[*nr2] = v[i];
+         *swizzle |= *nr2 << (i * 2);
+         (*nr2)++;
+      }
+   }
+
+   return TRUE;
+}
+
+
+
+/* Internally generated immediates: overkill...
+ */
+static struct brw_fp_src src_imm( struct brw_wm_compile *c, 
+                                 const GLfloat *v, 
+                                 unsigned nr)
+{
+   unsigned i, j;
+   unsigned swizzle;
+
+   /* Could do a first pass where we examine all existing immediates
+    * without expanding.
+    */
+
+   for (i = 0; i < c->nr_immediates; i++) {
+      if (match_or_expand_immediate( v, 
+                                     nr,
+                                     c->immediate[i].v,
+                                     &c->immediate[i].nr, 
+                                     &swizzle ))
+         goto out;
+   }
+
+   if (c->nr_immediates < Elements(c->immediate)) {
+      i = c->nr_immediates++;
+      if (match_or_expand_immediate( v,
+                                     nr,
+                                     c->immediate[i].v,
+                                     &c->immediate[i].nr, 
+                                     &swizzle ))
+         goto out;
+   }
+
+   c->error = 1;
+   return src_undef();
+
+out:
+   /* Make sure that all referenced elements are from this immediate.
+    * Has the effect of making size-one immediates into scalars.
+    */
+   for (j = nr; j < 4; j++)
+      swizzle |= (swizzle & 0x3) << (j * 2);
+
+   return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
+                      GET_SWZ(swizzle, X),
+                      GET_SWZ(swizzle, Y),
+                      GET_SWZ(swizzle, Z),
+                      GET_SWZ(swizzle, W) );
+}
+
+
+
+static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
+                                   GLfloat f )
+{
+   return src_imm(c, &f, 1);
+}
+
+static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
+                                   GLfloat x,
+                                   GLfloat y,
+                                   GLfloat z,
+                                   GLfloat w)
+{
+   GLfloat f[4] = {x,y,z,w};
+   return src_imm(c, f, 4);
+}
+
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
+{
+   struct brw_fp_dst reg;
+   reg.file = file;
+   reg.index = idx;
+   reg.writemask = BRW_WRITEMASK_XYZW;
+   reg.indirect = 0;
+   return reg;
+}
+
+static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
+{
+   reg.writemask &= mask;
+   return reg;
+}
+
+static struct brw_fp_dst dst_undef( void )
+{
+   return dst_reg(TGSI_FILE_NULL, 0);
+}
+
+static boolean dst_is_undef( struct brw_fp_dst dst )
+{
+   return dst.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
+{
+   reg.saturate = flag;
+   return reg;
+}
+
+static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
+{
+   int bit = ffs( ~c->fp_temp );
+
+   if (!bit) {
+      debug_printf("%s: out of temporaries\n", __FILE__);
+   }
+
+   c->fp_temp |= 1<<(bit-1);
+   return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
+{
+   c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
+}
+
+
+/***********************************************************************
+ * Instructions 
+ */
+
+static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+   return &c->fp_instructions[c->nr_fp_insns++];
+}
+
+static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
+                                            GLuint op,
+                                            struct brw_fp_dst dest,
+                                            GLuint tex_src_unit,
+                                            GLuint tex_src_target,
+                                            struct brw_fp_src src0,
+                                            struct brw_fp_src src1,
+                                            struct brw_fp_src src2 )
+{
+   struct brw_fp_instruction *inst = get_fp_inst(c);
+
+   inst->opcode = op;
+   inst->dst = dest;
+   inst->tex_unit = tex_src_unit;
+   inst->tex_target = tex_src_target;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
 
+   return inst;
+}
+   
+
+static INLINE void emit_op3(struct brw_wm_compile *c,
+                           GLuint op,
+                           struct brw_fp_dst dest,
+                           struct brw_fp_src src0,
+                           struct brw_fp_src src1,
+                           struct brw_fp_src src2 )
+{
+   emit_tex_op(c, op, dest, 0, 0, src0, src1, src2);
+}
+
+
+static INLINE void emit_op2(struct brw_wm_compile *c,
+                           GLuint op,
+                           struct brw_fp_dst dest,
+                           struct brw_fp_src src0,
+                           struct brw_fp_src src1)
+{
+   emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef());
+}
+
+static INLINE void emit_op1(struct brw_wm_compile *c,
+                           GLuint op,
+                           struct brw_fp_dst dest,
+                           struct brw_fp_src src0)
+{
+   emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef());
+}
+
+static INLINE void emit_op0(struct brw_wm_compile *c,
+                          GLuint op,
+                          struct brw_fp_dst dest)
+{
+   emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef());
+}
 
 
 
@@ -66,10 +364,10 @@ static const char *wm_opcode_strings[] = {
  */
 static void emit_scalar_insn(struct brw_wm_compile *c,
                             unsigned opcode,
-                            struct brw_dst dst,
-                            struct brw_src src0,
-                            struct brw_src src1,
-                            struct brw_src src2 )
+                            struct brw_fp_dst dst,
+                            struct brw_fp_src src0,
+                            struct brw_fp_src src1,
+                            struct brw_fp_src src2 )
 {
    unsigned first_chan = ffs(dst.writemask) - 1;
    unsigned first_mask = 1 << first_chan;
@@ -77,14 +375,14 @@ static void emit_scalar_insn(struct brw_wm_compile *c,
    if (dst.writemask == 0)
       return;
 
-   emit_op( c, opcode,
-           brw_writemask(dst, first_mask),
-           src0, src1, src2 );
+   emit_op3( c, opcode,
+            dst_mask(dst, first_mask),
+            src0, src1, src2 );
 
    if (dst.writemask != first_mask) {
       emit_op1(c, TGSI_OPCODE_MOV,
-              brw_writemask(dst, ~first_mask),
-              src_swizzle1(brw_src(dst), first_chan));
+              dst_mask(dst, ~first_mask),
+              src_scalar(src_reg_from_dst(dst), first_chan));
    }
 }
 
@@ -93,11 +391,11 @@ static void emit_scalar_insn(struct brw_wm_compile *c,
  * Special instructions for interpolation and other tasks
  */
 
-static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->pixel_xy)) {
-      struct ureg_dst pixel_xy = get_temp(c);
-      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   if (src_is_undef(c->fp_pixel_xy)) {
+      struct brw_fp_dst pixel_xy = get_temp(c);
+      struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       
       /* Emit the out calculations, and hold onto the results.  Use
@@ -105,79 +403,85 @@ static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
        */   
       /* pixel_xy.xy = PIXELXY payload[0];
        */
-      emit_op(c,
-             WM_PIXELXY,
-             dst_mask(pixel_xy, BRW_WRITEMASK_XY),
-             payload_r0_depth,
-             src_undef(),
-             src_undef());
+      emit_op1(c,
+              WM_PIXELXY,
+              dst_mask(pixel_xy, BRW_WRITEMASK_XY),
+              payload_r0_depth);
 
-      c->pixel_xy = src_reg_from_dst(pixel_xy);
+      c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
    }
 
-   return c->pixel_xy;
+   return c->fp_pixel_xy;
 }
 
-static struct ureg_src get_delta_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->delta_xy)) {
-      struct ureg_dst delta_xy = get_temp(c);
-      struct ureg_src pixel_xy = get_pixel_xy(c);
-      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   if (src_is_undef(c->fp_delta_xy)) {
+      struct brw_fp_dst delta_xy = get_temp(c);
+      struct brw_fp_src pixel_xy = get_pixel_xy(c);
+      struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       /* deltas.xy = DELTAXY pixel_xy, payload[0]
        */
-      emit_op(c,
+      emit_op3(c,
              WM_DELTAXY,
              dst_mask(delta_xy, BRW_WRITEMASK_XY),
              pixel_xy, 
              payload_r0_depth,
              src_undef());
       
-      c->delta_xy = src_reg_from_dst(delta_xy);
+      c->fp_delta_xy = src_reg_from_dst(delta_xy);
    }
 
-   return c->delta_xy;
+   return c->fp_delta_xy;
 }
 
-static struct ureg_src get_pixel_w( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->pixel_w)) {
-      struct ureg_dst pixel_w = get_temp(c);
-      struct ureg_src deltas = get_delta_xy(c);
-      struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS);
+   if (src_is_undef(c->fp_pixel_w)) {
+      struct brw_fp_dst pixel_w = get_temp(c);
+      struct brw_fp_src deltas = get_delta_xy(c);
+
+      /* XXX: assuming position is always first -- valid? 
+       */
+      struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
 
       /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
        */
-      emit_op(c,
-             WM_PIXELW,
-             dst_mask(pixel_w, BRW_WRITEMASK_W),
-             interp_wpos,
-             deltas, 
-             src_undef());
+      emit_op3(c,
+              WM_PIXELW,
+              dst_mask(pixel_w, BRW_WRITEMASK_W),
+              interp_wpos,
+              deltas, 
+              src_undef());
       
 
-      c->pixel_w = src_reg_from_dst(pixel_w);
+      c->fp_pixel_w = src_reg_from_dst(pixel_w);
    }
 
-   return c->pixel_w;
+   return c->fp_pixel_w;
 }
 
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
 static void emit_interp( struct brw_wm_compile *c,
+                        GLuint idx,
                         GLuint semantic,
-                        GLuint semantic_index,
                         GLuint interp_mode )
 {
-   struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
-   struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx);
-   struct ureg_src deltas = get_delta_xy(c);
+   struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+   struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
+   struct brw_fp_src deltas = get_delta_xy(c);
 
    /* Need to use PINTERP on attributes which have been
     * multiplied by 1/W in the SF program, and LINTERP on those
     * which have not:
     */
    switch (semantic) {
-   case FRAG_ATTRIB_WPOS:
+   case TGSI_SEMANTIC_POSITION:
       /* Have to treat wpos.xy specially:
        */
       emit_op1(c,
@@ -218,7 +522,8 @@ static void emit_interp( struct brw_wm_compile *c,
       }
 
       break;
-   case FRAG_ATTRIB_FOGC:
+
+   case TGSI_SEMANTIC_FOG:
       /* Interpolate the fog coordinate */
       emit_op3(c,
              WM_PINTERP,
@@ -228,17 +533,17 @@ static void emit_interp( struct brw_wm_compile *c,
              get_pixel_w(c));
 
       emit_op1(c,
-             TGSI_OPCODE_MOV,
-             dst_mask(dst, BRW_WRITEMASK_YZ),
-             brw_imm1f(0.0));
+              TGSI_OPCODE_MOV,
+              dst_mask(dst, BRW_WRITEMASK_YZ),
+              src_imm1f(c, 0.0));
 
       emit_op1(c,
-             TGSI_OPCODE_MOV,
-             dst_mask(dst, BRW_WRITEMASK_W),
-             brw_imm1f(1.0));
+              TGSI_OPCODE_MOV,
+              dst_mask(dst, BRW_WRITEMASK_W),
+              src_imm1f(c, 1.0));
       break;
 
-   case FRAG_ATTRIB_FACE:
+   case TGSI_SEMANTIC_FACE:
       /* XXX review/test this case */
       emit_op0(c,
               WM_FRONTFACING,
@@ -247,15 +552,15 @@ static void emit_interp( struct brw_wm_compile *c,
       emit_op1(c,
              TGSI_OPCODE_MOV,
              dst_mask(dst, BRW_WRITEMASK_YZ),
-             brw_imm1f(0.0));
+              src_imm1f(c, 0.0));
 
       emit_op1(c,
              TGSI_OPCODE_MOV,
              dst_mask(dst, BRW_WRITEMASK_W),
-             brw_imm1f(1.0));
+              src_imm1f(c, 1.0));
       break;
 
-   case FRAG_ATTRIB_PNTC:
+   case TGSI_SEMANTIC_PSIZE:
       /* XXX review/test this case */
       emit_op3(c,
               WM_PINTERP,
@@ -267,12 +572,12 @@ static void emit_interp( struct brw_wm_compile *c,
       emit_op1(c,
              TGSI_OPCODE_MOV,
              dst_mask(dst, BRW_WRITEMASK_Z),
-             brw_imm1f(c->pass_fp, 0.0f));
+             src_imm1f(c, 0.0f));
 
       emit_op1(c,
              TGSI_OPCODE_MOV,
              dst_mask(dst, BRW_WRITEMASK_W),
-             brw_imm1f(c->pass_fp, 1.0f));
+             src_imm1f(c, 1.0f));
       break;
 
    default: 
@@ -310,11 +615,11 @@ static void emit_interp( struct brw_wm_compile *c,
  * Expand various instructions here to simpler forms.  
  */
 static void precalc_dst( struct brw_wm_compile *c,
-                        struct brw_dst dst,
-                        struct brw_src src0,
-                        struct brw_src src1 )
+                        struct brw_fp_dst dst,
+                        struct brw_fp_src src0,
+                        struct brw_fp_src src1 )
 {
-   if (dst.WriteMask & BRW_WRITEMASK_Y) {      
+   if (dst.writemask & BRW_WRITEMASK_Y) {      
       /* dst.y = mul src0.y, src1.y
        */
       emit_op2(c,
@@ -324,25 +629,22 @@ static void precalc_dst( struct brw_wm_compile *c,
               src1);
    }
 
-   if (dst.WriteMask & BRW_WRITEMASK_XZ) {
-      struct prog_instruction *swz;
-      GLuint z = GET_SWZ(src0.Swizzle, Z);
-
+   if (dst.writemask & BRW_WRITEMASK_XZ) {
       /* dst.z = mov src0.zzzz
        */
       emit_op1(c,
              TGSI_OPCODE_MOV,
              dst_mask(dst, BRW_WRITEMASK_Z),
-             src_swizzle1(src0, Z));
+             src_scalar(src0, Z));
 
-      /* dst.x = immf(1.0)
+      /* dst.x = imm1f(1.0)
        */
       emit_op1(c,
              TGSI_OPCODE_MOV,
-             brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
-             src_immf(c, 1.0));
+             dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+             src_imm1f(c, 1.0));
    }
-   if (dst.WriteMask & BRW_WRITEMASK_W) {
+   if (dst.writemask & BRW_WRITEMASK_W) {
       /* dst.w = mov src1.w
        */
       emit_op1(c,
@@ -354,22 +656,22 @@ static void precalc_dst( struct brw_wm_compile *c,
 
 
 static void precalc_lit( struct brw_wm_compile *c,
-                        struct ureg_dst dst,
-                        struct ureg_src src0 )
+                        struct brw_fp_dst dst,
+                        struct brw_fp_src src0 )
 {
-   if (dst.WriteMask & BRW_WRITEMASK_XW) {
+   if (dst.writemask & BRW_WRITEMASK_XW) {
       /* dst.xw = imm(1.0f)
        */
       emit_op1(c,
               TGSI_OPCODE_MOV,
-              brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0),
-              brw_imm1f(1.0f));
+              dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
+              src_imm1f(c, 1.0f));
    }
 
-   if (dst.WriteMask & BRW_WRITEMASK_YZ) {
+   if (dst.writemask & BRW_WRITEMASK_YZ) {
       emit_op1(c,
               TGSI_OPCODE_LIT,
-              brw_writemask(dst, BRW_WRITEMASK_YZ),
+              dst_mask(dst, BRW_WRITEMASK_YZ),
               src0);
    }
 }
@@ -382,41 +684,42 @@ static void precalc_lit( struct brw_wm_compile *c,
  * instruction itself.
  */
 static void precalc_tex( struct brw_wm_compile *c,
-                        struct brw_dst dst,
+                        struct brw_fp_dst dst,
+                        unsigned target,
                         unsigned unit,
-                        struct brw_src src0 )
+                        struct brw_fp_src src0 )
 {
-   struct ureg_src coord = src_undef();
-   struct ureg_dst tmp = dst_undef();
+   struct brw_fp_src coord = src_undef();
+   struct brw_fp_dst tmp = dst_undef();
 
    assert(unit < BRW_MAX_TEX_UNIT);
 
    /* Cubemap: find longest component of coord vector and normalize
     * it.
     */
-   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
-      struct ureg_src tmpsrc;
+   if (target == TGSI_TEXTURE_CUBE) {
+      struct brw_fp_src tmpsrc;
 
       tmp = get_temp(c);
-      tmpsrc = brw_src(tmpcoord)
+      tmpsrc = src_reg_from_dst(tmp);
 
       /* tmp = abs(src0) */
       emit_op1(c, 
               TGSI_OPCODE_MOV,
               tmp,
-              brw_abs(src0));
+              src_abs(src0));
 
       /* tmp.X = MAX(tmp.X, tmp.Y) */
       emit_op2(c, TGSI_OPCODE_MAX,
-              brw_writemask(tmp, BRW_WRITEMASK_X),
-              src_swizzle1(tmpsrc, X),
-              src_swizzle1(tmpsrc, Y));
+              dst_mask(tmp, BRW_WRITEMASK_X),
+              src_scalar(tmpsrc, X),
+              src_scalar(tmpsrc, Y));
 
       /* tmp.X = MAX(tmp.X, tmp.Z) */
       emit_op2(c, TGSI_OPCODE_MAX,
-              brw_writemask(tmp, BRW_WRITEMASK_X),
+              dst_mask(tmp, BRW_WRITEMASK_X),
               tmpsrc,
-              src_swizzle1(tmpsrc, Z));
+              src_scalar(tmpsrc, Z));
 
       /* tmp.X = 1 / tmp.X */
       emit_op1(c, TGSI_OPCODE_RCP,
@@ -427,11 +730,12 @@ static void precalc_tex( struct brw_wm_compile *c,
       emit_op2(c, TGSI_OPCODE_MUL,
               tmp,
               src0,
-              src_swizzle1(tmpsrc, SWIZZLE_X));
+              src_scalar(tmpsrc, X));
 
       coord = tmpsrc;
    }
-   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+   else if (target == TGSI_TEXTURE_RECT ||
+           target == TGSI_TEXTURE_SHADOWRECT) {
       /* XXX: need a mechanism for internally generated constants.
        */
       coord = src0;
@@ -448,19 +752,18 @@ static void precalc_tex( struct brw_wm_compile *c,
    if (c->key.yuvtex_mask & (1 << unit)) {
       /* convert ycbcr to RGBA */
       GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
-      struct ureg_dst dst = inst->DstReg;
-      struct ureg_dst tmp = get_temp(c);
-      struct ureg_src tmpsrc = src_reg_from_dst(tmp);
-      struct ureg_src C0 = ureg_imm4f( c->ureg,  -.5, -.0625, -.5, 1.164 );
-      struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 );
+      struct brw_fp_dst tmp = get_temp(c);
+      struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
+      struct brw_fp_src C0 = src_imm4f( c,  -.5, -.0625, -.5, 1.164 );
+      struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
      
       /* tmp     = TEX ...
        */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  brw_saturate(tmp, dst.Saturate),
+                  dst_saturate(tmp, dst.saturate),
                   unit,
-                  inst->TexSrcTarget,
+                  target,
                   coord,
                   src_undef(),
                   src_undef());
@@ -477,7 +780,7 @@ static void precalc_tex( struct brw_wm_compile *c,
       emit_op2(c, TGSI_OPCODE_MUL,
               dst_mask(tmp, BRW_WRITEMASK_Y),
               tmpsrc,
-              src_swizzle1(C0, W));
+              src_scalar(C0, W));
 
       /* 
        * if (UV swaped)
@@ -492,16 +795,16 @@ static void precalc_tex( struct brw_wm_compile *c,
                 src_swizzle(tmpsrc, Z,Z,X,X) : 
                 src_swizzle(tmpsrc, X,X,Z,Z)),
               C1,
-              src_swizzle1(tmpsrc, Y));
+              src_scalar(tmpsrc, Y));
 
       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
        */
       emit_op3(c,
               TGSI_OPCODE_MAD,
               dst_mask(dst, BRW_WRITEMASK_Y),
-              src_swizzle1(tmpsrc, Z),
-              src_swizzle1(C1, W),
-              src_swizzle1(src_reg_from_dst(dst), Y));
+              src_scalar(tmpsrc, Z),
+              src_scalar(C1, W),
+              src_scalar(src_reg_from_dst(dst), Y));
 
       release_temp(c, tmp);
    }
@@ -509,9 +812,9 @@ static void precalc_tex( struct brw_wm_compile *c,
       /* ordinary RGBA tex instruction */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  inst->DstReg,
+                  dst,
                   unit,
-                  inst->TexSrcTarget,
+                  target,
                   coord,
                   src_undef(),
                   src_undef());
@@ -523,8 +826,8 @@ static void precalc_tex( struct brw_wm_compile *c,
 
    /* Release this temp if we ended up allocating it:
     */
-   if (!brw_dst_is_undef(tmpcoord))
-      release_temp(c, tmpcoord);
+   if (!dst_is_undef(tmp))
+      release_temp(c, tmp);
 }
 
 
@@ -532,13 +835,9 @@ static void precalc_tex( struct brw_wm_compile *c,
  * Check if the given TXP instruction really needs the divide-by-W step.
  */
 static GLboolean projtex( struct brw_wm_compile *c,
-                         const struct prog_instruction *inst )
+                         unsigned target, 
+                         struct brw_fp_src src )
 {
-   const struct ureg_src src = inst->SrcReg[0];
-   GLboolean retVal;
-
-   assert(inst->Opcode == TGSI_OPCODE_TXP);
-
    /* Only try to detect the simplest cases.  Could detect (later)
     * cases where we are trying to emit code like RCP {1.0}, MUL x,
     * {1.0}, and so on.
@@ -546,16 +845,15 @@ static GLboolean projtex( struct brw_wm_compile *c,
     * More complex cases than this typically only arise from
     * user-provided fragment programs anyway:
     */
-   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
-      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
-   else if (src.File == TGSI_FILE_INPUT && 
-           GET_SWZ(src.Swizzle, W) == W &&
-            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
-      retVal = GL_FALSE;
-   else
-      retVal = GL_TRUE;
-
-   return retVal;
+   if (target == TGSI_TEXTURE_CUBE)
+      return GL_FALSE;  /* ut2004 gun rendering !?! */
+   
+   if (src.file == TGSI_FILE_INPUT && 
+       GET_SWZ(src.swizzle, W) == W &&
+       (c->key.proj_attrib_mask & (1 << src.index)) == 0)
+      return GL_FALSE;
+
+   return GL_TRUE;
 }
 
 
@@ -563,110 +861,168 @@ static GLboolean projtex( struct brw_wm_compile *c,
  * Emit code for TXP.
  */
 static void precalc_txp( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
+                        struct brw_fp_dst dst,
+                        unsigned target,
+                        unsigned unit,
+                        struct brw_fp_src src0 )
 {
-   struct ureg_src src0 = inst->SrcReg[0];
-
-   if (projtex(c, inst)) {
-      struct ureg_dst tmp = get_temp(c);
-      struct prog_instruction tmp_inst;
+   if (projtex(c, target, src0)) {
+      struct brw_fp_dst tmp = get_temp(c);
 
       /* tmp0.w = RCP inst.arg[0][3]
        */
-      emit_op(c,
+      emit_op1(c,
              TGSI_OPCODE_RCP,
              dst_mask(tmp, BRW_WRITEMASK_W),
-             src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
-             src_undef(),
-             src_undef());
+             src_scalar(src0, W));
 
       /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
        */
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             dst_mask(tmp, BRW_WRITEMASK_XYZ),
-             src0,
-             src_swizzle1(src_reg_from_dst(tmp), W),
-             src_undef());
+      emit_op2(c,
+              TGSI_OPCODE_MUL,
+              dst_mask(tmp, BRW_WRITEMASK_XYZ),
+              src0,
+              src_scalar(src_reg_from_dst(tmp), W));
 
-      /* dst = precalc(TEX tmp0)
+      /* dst = TEX tmp0
        */
-      tmp_inst = *inst;
-      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
-      precalc_tex(c, &tmp_inst);
+      precalc_tex(c, 
+                 dst,
+                 target,
+                 unit,
+                 src_reg_from_dst(tmp));
 
       release_temp(c, tmp);
    }
    else
    {
-      /* dst = precalc(TEX src0)
+      /* dst = TEX src0
        */
-      precalc_tex(c, inst);
+      precalc_tex(c, dst, target, unit, src0);
    }
 }
 
 
+/* XXX: note this returns a src_reg.
+ */
+static struct brw_fp_src
+find_output_by_semantic( struct brw_wm_compile *c,
+                        unsigned semantic,
+                        unsigned index )
+{
+   const struct tgsi_shader_info *info = &c->fp->info;
+   unsigned i;
+
+   for (i = 0; i < info->num_outputs; i++)
+      if (info->output_semantic_name[i] == semantic &&
+         info->output_semantic_index[i] == index)
+        return src_reg( TGSI_FILE_OUTPUT, i );
+
+   /* If not found, return some arbitrary immediate value:
+    */
+   return src_imm1f(c, 1.0);
+}
+
 
 static void emit_fb_write( struct brw_wm_compile *c )
 {
-   struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
-   struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH);
-   struct ureg_src outcolor;
-   struct prog_instruction *inst;
+   struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
    GLuint i;
 
 
-   /* The inst->Aux field is used for FB write target and the EOT marker */
+   outdepth = src_scalar(outdepth, Z);
 
    for (i = 0 ; i < c->key.nr_cbufs; i++) {
-      outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
+      struct brw_fp_src outcolor;
+      unsigned target = 1<<i;
 
-      inst = emit_op(c, WM_FB_WRITE,
-                    dst_mask(dst_undef(), 0),
-                    outcolor,
-                    payload_r0_depth,
-                    outdepth);
+      /* Set EOT flag on last inst:
+       */
+      if (i == c->key.nr_cbufs - 1)
+        target |= 1;
+      
+      outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
 
-      inst->Aux = (i<<1);
+      /* Use emit_tex_op so that we can specify the inst->tex_target
+       * field, which is abused to contain the FB write target and the
+       * EOT marker
+       */
+      emit_tex_op(c, WM_FB_WRITE,
+                 dst_undef(),
+                 target,
+                 0,
+                 outcolor,
+                 payload_r0_depth,
+                 outdepth);
    }
-   /* Set EOT flag on last inst:
-    */
-   inst->Aux |= 1; //eot
 }
 
 
+static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
+                                       const struct tgsi_full_dst_register *dst,
+                                       unsigned saturate )
+{
+   struct brw_fp_dst out;
+
+   out.file = dst->DstRegister.File;
+   out.index = dst->DstRegister.Index;
+   out.writemask = dst->DstRegister.WriteMask;
+   out.indirect = dst->DstRegister.Indirect;
+   out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
+   
+   if (out.indirect) {
+      assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS);
+      assert(dst->DstRegisterInd.Index == 0);
+   }
+   
+   return out;
+}
 
 
-/***********************************************************************
- * Emit INTERP instructions ahead of first use of each attrib.
- */
-
-static void validate_src_regs( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
+static struct brw_fp_src translate_src( struct brw_wm_compile *c,
+                                       const struct tgsi_full_src_register *src )
 {
-   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
-   GLuint i;
+   struct brw_fp_src out;
+
+   out.file = src->SrcRegister.File;
+   out.index = src->SrcRegister.Index;
+   out.indirect = src->SrcRegister.Indirect;
+
+   out.swizzle = ((src->SrcRegister.SwizzleX << 0) |
+                 (src->SrcRegister.SwizzleY << 2) |
+                 (src->SrcRegister.SwizzleZ << 4) |
+                 (src->SrcRegister.SwizzleW << 6));
+   
+   switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      out.abs = 1;
+      out.negate = 0;
+      break;
 
-   for (i = 0; i < nr_args; i++) {
-      if (inst->SrcReg[i].File == TGSI_FILE_INPUT) {
-        GLuint idx = inst->SrcReg[i].Index;
-        if (!(c->fp_interp_emitted & (1<<idx))) {
-           emit_interp(c, idx);
-           c->fp_interp_emitted |= 1<<idx;
-        }
-      }
+   case TGSI_UTIL_SIGN_SET:
+      out.abs = 1;
+      out.negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      out.abs = 0;
+      out.negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+   default:
+      out.abs = 0;
+      out.negate = 0;
+      break;
    }
-}
-        
-static void validate_dst_regs( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
-{
-   if (inst->DstReg.File == TGSI_FILE_OUTPUT) {
-      GLuint idx = inst->DstReg.Index;
-      if (idx == FRAG_RESULT_COLOR)
-         c->fp_fragcolor_emitted |= inst->DstReg.WriteMask;
+
+   if (out.indirect) {
+      assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS);
+      assert(src->SrcRegisterInd.Index == 0);
    }
+   
+   return out;
 }
 
 
@@ -674,59 +1030,78 @@ static void validate_dst_regs( struct brw_wm_compile *c,
 static void emit_insn( struct brw_wm_compile *c,
                       const struct tgsi_full_instruction *inst )
 {
-
-   switch (inst->Opcode) {
+   unsigned opcode = inst->Instruction.Opcode;
+   struct brw_fp_dst dst;
+   struct brw_fp_src src[3];
+   int i;
+
+   dst = translate_dst( c, &inst->FullDstRegisters[0],
+                       inst->Instruction.Saturate );
+
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
+      src[i] = translate_src( c, &inst->FullSrcRegisters[0] );
+   
+   switch (opcode) {
    case TGSI_OPCODE_ABS:
       emit_op1(c, TGSI_OPCODE_MOV,
               dst, 
-              brw_abs(src[0]));
+              src_abs(src[0]));
       break;
 
    case TGSI_OPCODE_SUB: 
       emit_op2(c, TGSI_OPCODE_ADD,
               dst,
               src[0],
-              brw_negate(src[1]));
+              src_negate(src[1]));
       break;
 
    case TGSI_OPCODE_SCS: 
       emit_op1(c, TGSI_OPCODE_SCS,
-              brw_writemask(dst, BRW_WRITEMASK_XY),
+              dst_mask(dst, BRW_WRITEMASK_XY),
               src[0]);
       break;
         
    case TGSI_OPCODE_DST:
-      precalc_dst(c, inst);
+      precalc_dst(c, dst, src[0], src[1]);
       break;
 
    case TGSI_OPCODE_LIT:
-      precalc_lit(c, inst);
+      precalc_lit(c, dst, src[0]);
       break;
 
    case TGSI_OPCODE_TEX:
-      precalc_tex(c, inst);
+      precalc_tex(c, dst,
+                 inst->InstructionExtTexture.Texture,
+                 src[0].file,  /* sampler unit */
+                 src[1] );
       break;
 
    case TGSI_OPCODE_TXP:
-      precalc_txp(c, inst);
+      precalc_txp(c, dst,
+                 inst->InstructionExtTexture.Texture,
+                 src[0].file,  /* sampler unit */
+                 src[1] );
       break;
 
    case TGSI_OPCODE_TXB:
-      out = emit_insn(c, inst);
-      out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-      assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+      /* XXX: TXB not done
+       */
+      precalc_tex(c, dst,
+                 inst->InstructionExtTexture.Texture,
+                 src[0].file,  /* sampler unit */
+                 src[1] );
       break;
 
    case TGSI_OPCODE_XPD: 
       emit_op2(c, TGSI_OPCODE_XPD,
-              brw_writemask(dst, BRW_WRITEMASK_XYZ),
+              dst_mask(dst, BRW_WRITEMASK_XYZ),
               src[0], 
               src[1]);
       break;
 
    case TGSI_OPCODE_KIL: 
       emit_op1(c, TGSI_OPCODE_KIL,
-              brw_writemask(dst_undef(), 0),
+              dst_mask(dst_undef(), 0),
               src[0]);
       break;
 
@@ -734,10 +1109,11 @@ static void emit_insn( struct brw_wm_compile *c,
       emit_fb_write(c);
       break;
    default:
-      if (brw_wm_is_scalar_result(inst->Opcode))
+      if (!c->key.has_flow_control &&
+         brw_wm_is_scalar_result(opcode))
         emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
       else
-        emit_op(c, opcode, dst, src[0], src[1], src[2]);
+        emit_op3(c, opcode, dst, src[0], src[1], src[2]);
       break;
    }
 }
@@ -746,46 +1122,70 @@ static void emit_insn( struct brw_wm_compile *c,
  * Initial pass for fragment program code generation.
  * This function is used by both the GLSL and non-GLSL paths.
  */
-void brw_wm_pass_fp( struct brw_wm_compile *c )
+int brw_wm_pass_fp( struct brw_wm_compile *c )
 {
-   struct brw_fragment_program *fp = c->fp;
-   GLuint insn;
+   struct brw_fragment_shader *fs = c->fp;
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction *inst;
+   struct tgsi_full_declaration *decl;
+   const float *imm;
+   GLuint size;
+   GLuint i;
 
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pre-fp:\n");
-      tgsi_dump(fp->tokens, 0); 
+      tgsi_dump(fs->tokens, 0); 
    }
 
-   c->pixel_xy = brw_src_undef();
-   c->delta_xy = brw_src_undef();
-   c->pixel_w = brw_src_undef();
+   c->fp_pixel_xy = src_undef();
+   c->fp_delta_xy = src_undef();
+   c->fp_pixel_w = src_undef();
    c->nr_fp_insns = 0;
-   c->fp->tex_units_used = 0x0;
+   c->nr_immediates = 0;
 
 
    /* Loop over all instructions doing assorted simplifications and
     * transformations.
     */
-   tgsi_parse_init( &parse, tokens );
+   tgsi_parse_init( &parse, fs->tokens );
    while( !tgsi_parse_end_of_tokens( &parse ) ) {
       tgsi_parse_token( &parse );
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-        /* If branching shader, emit preamble instructions at decl time, as
-         * instruction order in the shader does not correspond to the order
-         * instructions are executed in the wild.
-         *
-         * This is where special instructions such as WM_CINTERP,
-         * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute
-         * shader inputs from varying vars.
+        /* Turn intput declarations into special WM_* instructions.
          *
          * XXX: For non-branching shaders, consider deferring variable
          * initialization as late as possible to minimize register
          * usage.  This is how the original BRW driver worked.
+         *
+         * In a branching shader, must preamble instructions at decl
+         * time, as instruction order in the shader does not
+         * correspond to the order instructions are executed in the
+         * wild.
+         *
+         * This is where special instructions such as WM_CINTERP,
+         * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+         * compute shader inputs from the payload registers and pixel
+         * position.
          */
-        validate_src_regs(c, inst);
-        validate_dst_regs(c, inst);
+         decl = &parse.FullToken.FullDeclaration;
+         if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+            unsigned first, last, mask;
+            unsigned attrib;
+
+            first = decl->DeclarationRange.First;
+            last = decl->DeclarationRange.Last;
+            mask = decl->Declaration.UsageMask;
+
+            for (attrib = first; attrib <= last; attrib++) {
+              emit_interp(c, 
+                          attrib, 
+                          decl->Semantic.SemanticName,
+                          decl->Declaration.Interpolate );
+            }
+         }
+        
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
@@ -795,21 +1195,36 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
          * float value per instruction.  Just save the data for now
          * and use directly later.
          */
+        i = c->nr_immediates++;
+        imm = &parse.FullToken.FullImmediate.u[i].Float;
+        size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+
+        if (c->nr_immediates >= BRW_WM_MAX_CONST)
+           return PIPE_ERROR_OUT_OF_MEMORY;
+
+        for (i = 0; i < size; i++)
+           c->immediate[c->nr_immediates].v[i] = imm[i];
+
+        for (; i < 4; i++)
+           c->immediate[c->nr_immediates].v[i] = 0.0;
+
+        c->immediate[c->nr_immediates].nr = size;
+        c->nr_immediates++;
         break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
          inst = &parse.FullToken.FullInstruction;
-        emit_insn( c, inst );
+        emit_insn(c, inst);
         break;
       }
    }
 
-   c->brw_program = brw_finalize( c->builder );
-
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pass_fp:\n");
-      brw_print_program( c->brw_program );
+      //brw_print_program( c->fp_brw_program );
       debug_printf("\n");
    }
+
+   return c->error;
 }