965: get fragment shader compiler compiling
authorKeith Whitwell <keith@tungstengraphics.com>
Thu, 13 Dec 2007 20:38:56 +0000 (20:38 +0000)
committerKeith Whitwell <keith@tungstengraphics.com>
Thu, 13 Dec 2007 20:39:31 +0000 (20:39 +0000)
Don't think that it will run though.

32 files changed:
src/mesa/pipe/i915simple/i915_texture.c
src/mesa/pipe/i965simple/Makefile
src/mesa/pipe/i965simple/brw_batch.h
src/mesa/pipe/i965simple/brw_cc.c
src/mesa/pipe/i965simple/brw_clip_state.c
src/mesa/pipe/i965simple/brw_context.c
src/mesa/pipe/i965simple/brw_context.h
src/mesa/pipe/i965simple/brw_curbe.c
src/mesa/pipe/i965simple/brw_eu.h
src/mesa/pipe/i965simple/brw_eu_emit.c
src/mesa/pipe/i965simple/brw_gs_state.c
src/mesa/pipe/i965simple/brw_misc_state.c
src/mesa/pipe/i965simple/brw_sf.c
src/mesa/pipe/i965simple/brw_sf.h
src/mesa/pipe/i965simple/brw_sf_emit.c
src/mesa/pipe/i965simple/brw_sf_state.c
src/mesa/pipe/i965simple/brw_shader_info.c [new file with mode: 0644]
src/mesa/pipe/i965simple/brw_state.c
src/mesa/pipe/i965simple/brw_state.h
src/mesa/pipe/i965simple/brw_state_cache.c
src/mesa/pipe/i965simple/brw_state_pool.c
src/mesa/pipe/i965simple/brw_state_upload.c
src/mesa/pipe/i965simple/brw_tex_layout.c
src/mesa/pipe/i965simple/brw_vs_state.c
src/mesa/pipe/i965simple/brw_wm.c
src/mesa/pipe/i965simple/brw_wm.h
src/mesa/pipe/i965simple/brw_wm_decl.c [new file with mode: 0644]
src/mesa/pipe/i965simple/brw_wm_glsl.c
src/mesa/pipe/i965simple/brw_wm_sampler_state.c
src/mesa/pipe/i965simple/brw_wm_state.c
src/mesa/pipe/p_util.h
src/mesa/pipe/softpipe/sp_texture.c

index fefd105adf7a50c813165054c23cdb0a72aa80c0..44f72e63cc20ed236a30b8622ebd29dd4fbd92da 100644 (file)
@@ -47,10 +47,6 @@ static unsigned minify( unsigned d )
    return MAX2(1, d>>1);
 }
 
-static int align(int value, int alignment)
-{
-   return (value + alignment - 1) & ~(alignment - 1);
-}
 
 
 static void
index 21f40f72a0fff32fe8d029a15d80d9015c88263f..48c00ab50b80302aa9944bce92093598ed3625c3 100644 (file)
@@ -31,6 +31,7 @@ DRIVER_SOURCES = \
        brw_sf.c \
        brw_sf_emit.c \
        brw_sf_state.c \
+        brw_shader_info.c \
         brw_state.c \
        brw_state_batch.c \
        brw_state_cache.c \
@@ -44,6 +45,7 @@ DRIVER_SOURCES = \
        brw_vs_state.c \
        brw_wm.c \
        brw_wm_iz.c \
+       brw_wm_decl.c \
        brw_wm_glsl.c \
        brw_wm_sampler_state.c \
        brw_wm_state.c \
index 7c778f360bc1ac855f8b2e5ffd02d8d6343e5f7b..bef69ac871bde696ef398f04ed57d0b31e5ca262 100644 (file)
@@ -36,7 +36,7 @@
 #define INTEL_BATCH_CLIPRECTS    0x2
 
 #define BEGIN_BATCH( dwords, relocs ) \
-   (brw->batch_start = brw->winsys->batch_start(brw->winsys, dwords, relocs))
+   brw->winsys->batch_start(brw->winsys, dwords, relocs)
 
 #define OUT_BATCH( dword ) \
    brw->winsys->batch_dword(brw->winsys, dword)
@@ -50,7 +50,6 @@
  */
 #define FLUSH_BATCH(fence) do {                                \
    brw->winsys->batch_flush(brw->winsys, fence);       \
-   brw->batch_start = NULL;                            \
    brw->hardware_dirty = ~0;                           \
 } while (0)
 
index fc7fdba53fc8d69d044a4cb8d3fcc6b733714d14..6cc1505311e5bb3559abfe85e96ba3cb35d222db 100644 (file)
@@ -142,7 +142,7 @@ static void upload_cc_vp( struct brw_context *brw )
 
 const struct brw_tracked_state brw_cc_vp = {
    .dirty = {
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_SCENE,
       .cache = 0
    },
    .update = upload_cc_vp
index 51a4666a0bc60659e8255c2db9ac08464b38b6d1..ea5c05a2796a0cd79fc2927ef4a2ed0855ad14cd 100644 (file)
@@ -32,7 +32,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-
+#include "pipe/p_util.h"
 
 
 static void upload_clip_unit( struct brw_context *brw )
@@ -43,7 +43,7 @@ static void upload_clip_unit( struct brw_context *brw )
 
    /* CACHE_NEW_CLIP_PROG */
    clip.thread0.grf_reg_count =
-      ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1;
+      align(brw->clip.prog_data->total_grf, 16) / 16 - 1;
    clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6;
    clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length;
    clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length;
index e69ba6938ecab854039841fc99a0a866f4e2f6c4..5e58701e91cc7911e2ee05f99fbbd4134ca7119a 100644 (file)
@@ -237,7 +237,6 @@ struct pipe_context *brw_create(struct pipe_winsys *pipe_winsys,
    brw->pci_id = pci_id;
    brw->dirty = ~0;
    brw->hardware_dirty = ~0;
-   brw->batch_start = NULL;
 
    memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind));
 
index 20528f00b38d6b71c6cefd26ca246e36ab598979..318c6a7049db18b956c461bec663eb9f1ae0b9d0 100644 (file)
  * Handles blending and (presumably) depth and stencil testing.
  */
 
-#define BRW_FALLBACK_TEXTURE            0x1
 #define BRW_MAX_CURBE                    (32*16)
 
 struct brw_context;
@@ -147,16 +146,13 @@ struct brw_winsys;
 /* Raised for other internal events:
  */
 #define BRW_NEW_URB_FENCE               0x10000
-#define BRW_NEW_INPUT_DIMENSIONS        0x20000
+#define BRW_NEW_PSP                     0x20000
 #define BRW_NEW_CURBE_OFFSETS           0x40000
 #define BRW_NEW_REDUCED_PRIMITIVE       0x80000
 #define BRW_NEW_PRIMITIVE               0x100000
-#define BRW_NEW_CONTEXT                 0x200000
-#define BRW_NEW_WM_INPUT_DIMENSIONS     0x400000
-#define BRW_NEW_INPUT_VARYING           0x800000
-#define BRW_NEW_PSP                     0x1000000
+#define BRW_NEW_SCENE                 0x200000
+#define BRW_NEW_SF_LINKAGE              0x400000
 
-#define ALIGN(value, alignment)  ((value + alignment - 1) & ~(alignment - 1))
 extern int BRW_DEBUG;
 
 #define DEBUG_TEXTURE  0x1
@@ -198,23 +194,47 @@ struct brw_state_flags {
    unsigned brw;
 };
 
+
+struct brw_shader_info {
+   int nr_regs[8];             /* TGSI_FILE_* */
+};
+   
+
+
 struct brw_vertex_program {
    struct pipe_shader_state program;
-   unsigned id;
-   unsigned param_state;               /* flags indicating state tracked by params */
+   struct brw_shader_info info;
+   int id;
 };
 
 
 
 struct brw_fragment_program {
    struct pipe_shader_state program;
-   unsigned id;
-   unsigned param_state;               /* flags indicating state tracked by params */
+   struct brw_shader_info info;
+   
+   boolean UsesDepth;
    boolean UsesKill;
    boolean ComputesDepth;
+   int id;
 };
 
 
+
+
+struct pipe_setup_linkage {
+   struct {
+      unsigned vp_output:5;
+      unsigned interp_mode:4;
+      unsigned bf_vp_output:5;
+   } fp_input[PIPE_MAX_SHADER_INPUTS];
+
+   unsigned fp_input_count:5;
+   unsigned max_vp_output:5;
+};
+   
+
+
 struct brw_texture {
    struct pipe_texture base;
 
@@ -248,6 +268,12 @@ struct brw_texture {
  * corresponding to a different brw_wm_prog_key struct, with different
  * compiled programs:
  */
+/* Data about a particular attempt to compile a program.  Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+
 struct brw_wm_prog_data {
    unsigned curb_read_length;
    unsigned urb_read_length;
@@ -256,13 +282,14 @@ struct brw_wm_prog_data {
    unsigned total_grf;
    unsigned total_scratch;
 
-   unsigned nr_params;
-   boolean error;
-
-   /* Pointer to tracked values (only valid once
-    * _mesa_load_state_parameters has been called at runtime).
+   /* Internally generated constants for the CURBE.  These are loaded
+    * ahead of the data from the constant buffer.
     */
-   const float *param[BRW_MAX_CURBE];
+   const float internal_const[8];
+   unsigned nr_internal_consts;
+   unsigned max_const;
+
+   boolean error;
 };
 
 struct brw_sf_prog_data {
@@ -298,19 +325,14 @@ struct brw_vs_prog_data {
 
    unsigned inputs_read;
 
+   unsigned max_const;
+
    /* Used for calculating urb partitions:
     */
    unsigned urb_entry_size;
 };
 
 
-/* Size == 0 if output either not written, or always [0,0,0,1]
- */
-struct brw_vs_ouput_sizes {
-   ubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
-};
-
-
 #define BRW_MAX_TEX_UNIT 8
 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
 
@@ -374,8 +396,6 @@ struct brw_cache {
 struct brw_tracked_state {
    struct brw_state_flags dirty;
    void (*update)( struct brw_context *brw );
-   void (*emit_reloc)( struct brw_context *brw );
-   boolean always_update;
 };
 
 
@@ -455,8 +475,6 @@ struct brw_context
 
    struct {
       struct brw_state_flags dirty;
-      struct brw_tracked_state **atoms;
-      unsigned nr_atoms;
    } state;
 
 
@@ -496,27 +514,16 @@ struct brw_context
 #define BRW_NR_UPLOAD_BUFS 17
 #define BRW_UPLOAD_INIT_SIZE (128*1024)
 
-      struct {
-        struct pipe_buffer_handle *vbo[BRW_NR_UPLOAD_BUFS];
-        unsigned buf;
-        unsigned offset;
-        unsigned size;
-        unsigned wrap;
-      } upload;
-
       /* Summary of size and varying of active arrays, so we can check
        * for changes to this state:
        */
       struct brw_vertex_info info;
-      int last_vb;
    } vb;
 
 
-   unsigned *batch_start;
    unsigned hardware_dirty;
    unsigned dirty;
    unsigned pci_id;
-
    /* BRW_NEW_URB_ALLOCATIONS:
     */
    struct {
@@ -557,11 +564,6 @@ struct brw_context
       unsigned vs_size;
       unsigned total_size;
 
-      /* Dynamic tracker which changes to reflect the state referenced
-       * by active fp and vp program parameters:
-       */
-      struct brw_tracked_state tracked_state;
-
       unsigned gs_offset;
 
       float *last_buf;
@@ -595,6 +597,8 @@ struct brw_context
    struct {
       struct brw_sf_prog_data *prog_data;
 
+      struct pipe_setup_linkage linkage;
+
       unsigned prog_gs_offset;
       unsigned vp_gs_offset;
       unsigned state_gs_offset;
@@ -602,11 +606,8 @@ struct brw_context
 
    struct {
       struct brw_wm_prog_data *prog_data;
-      struct brw_wm_compile *compile_data;
 
-      /* Input sizes, calculated from active vertex program:
-       */
-      unsigned input_size_masks[4];
+//      struct brw_wm_compiler *compile_data;
 
 
       /**
@@ -667,8 +668,6 @@ void brw_destroy_state(struct brw_context *brw);
  * brw_tex.c
  */
 void brwUpdateTextureState( struct brw_context *brw );
-void brw_FrameBufferTexInit( struct brw_context *brw );
-void brw_FrameBufferTexDestroy( struct brw_context *brw );
 
 
 /* brw_urb.c
index 0894e82d56f458805ea92b92e3766508e8632c80..b943a7af98cef1ade93ac3c05c7011453bdfdc23 100644 (file)
@@ -35,6 +35,9 @@
 #include "brw_defines.h"
 #include "brw_state.h"
 #include "brw_util.h"
+#include "brw_wm.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
 
 #define FILE_DEBUG_FLAG DEBUG_FALLBACKS
 
 static void calculate_curbe_offsets( struct brw_context *brw )
 {
    /* CACHE_NEW_WM_PROG */
-   unsigned nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+   unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16);
 
    /* BRW_NEW_VERTEX_PROGRAM */
-   struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->attribs.VertexProgram;
-   unsigned nr_vp_regs = (vp->program.num_inputs * 4 + 15) / 16;
+   unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16);
    unsigned nr_clip_regs = 0;
    unsigned total_regs;
 
@@ -55,7 +57,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
    /* BRW_NEW_CLIP ? */
    if (brw->attribs.Transform->ClipPlanesEnabled) {
       unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
-      nr_clip_regs = (nr_planes * 4 + 15) / 16;
+      nr_clip_regs = align(nr_planes * 4, 16);
    }
 #endif
 
@@ -172,28 +174,18 @@ static float fixed_plane[6][4] = {
    { 1,    0,    0, 1 }
 };
 
-#if 0
 /* Upload a new set of constants.  Too much variability to go into the
  * cache mechanism, but maybe would benefit from a comparison against
  * the current uploaded set of constants.
  */
 static void upload_constant_buffer(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program;
-   struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program;
    struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
    unsigned sz = brw->curbe.total_size;
    unsigned bufsz = sz * 16 * sizeof(float);
    float *buf;
    unsigned i;
 
-   /* Update our own dependency flags.  This works because this
-    * function will also be called whenever fp or vp changes.
-    */
-   brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION);
-   brw->curbe.tracked_state.dirty.mesa |= vp->param_state;
-   brw->curbe.tracked_state.dirty.mesa |= fp->param_state;
 
    if (sz == 0) {
       struct brw_constant_buffer cb;
@@ -220,10 +212,16 @@ static void upload_constant_buffer(struct brw_context *brw)
    if (brw->curbe.wm_size) {
       unsigned offset = brw->curbe.wm_start * 16;
 
-      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+      /* First the constant buffer constants:
+       */
+      
+      /* Then any internally generated constants: 
+       */
+      for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++)
+        buf[offset + i] = brw->wm.prog_data->internal_const[i];
 
-      for (i = 0; i < brw->wm.prog_data->nr_params; i++)
-        buf[offset + i] = brw->wm.prog_data->param[i][0];
+      assert(brw->wm.prog_data->max_const == 
+            brw->wm.prog_data->nr_internal_consts);
    }
 
 
@@ -243,34 +241,26 @@ static void upload_constant_buffer(struct brw_context *brw)
         buf[offset + i * 4 + 3] = fixed_plane[i][3];
       }
 
-      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
-       * clip-space:
+      /* Clip planes: BRW_NEW_CLIP:
        */
-      assert(MAX_CLIP_PLANES == 6);
-      for (j = 0; j < MAX_CLIP_PLANES; j++) {
-        if (brw->attribs.Transform->ClipPlanesEnabled & (1<<j)) {
-           buf[offset + i * 4 + 0] = brw->attribs.Transform->_ClipUserPlane[j][0];
-           buf[offset + i * 4 + 1] = brw->attribs.Transform->_ClipUserPlane[j][1];
-           buf[offset + i * 4 + 2] = brw->attribs.Transform->_ClipUserPlane[j][2];
-           buf[offset + i * 4 + 3] = brw->attribs.Transform->_ClipUserPlane[j][3];
-           i++;
-        }
+      for (j = 0; j < brw->attribs.Clip.nr; j++) {
+        buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0];
+        buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1];
+        buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2];
+        buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3];
+        i++;
       }
    }
 
 
    if (brw->curbe.vs_size) {
-      unsigned offset = brw->curbe.vs_start * 16;
-      unsigned nr = vp->program.Base.Parameters->NumParameters;
+//      unsigned offset = brw->curbe.vs_start * 16;
+//      unsigned nr = vp->max_const;
 
-      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
+      /* map the vertex constant buffer and copy to curbe: */
 
-      for (i = 0; i < nr; i++) {
-        buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0];
-        buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1];
-        buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2];
-        buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3];
-      }
+//      assert(nr == 0);
+      assert(0);
    }
 
    if (0) {
@@ -309,7 +299,12 @@ static void upload_constant_buffer(struct brw_context *brw)
 
       /* Copy data to the buffer:
        */
-      dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf);
+      brw->winsys->buffer_subdata_typed(brw->winsys,
+                                       pool->buffer, 
+                                       brw->curbe.gs_offset, 
+                                       bufsz, 
+                                       buf,
+                                       BRW_CONSTANT_BUFFER );
    }
 
    /* TODO: only emit the constant_buffer packet when necessary, ie:
@@ -341,9 +336,7 @@ static void upload_constant_buffer(struct brw_context *brw)
        * flushes as necessary when doublebuffering of CURBEs isn't
        * possible.
        */
-/*       intel_batchbuffer_align(brw->intel.batch, 64, sizeof(cb)); */
       BRW_BATCH_STRUCT(brw, &cb);
-/*       intel_batchbuffer_align(brw->intel.batch, 64, 0); */
    }
 }
 
@@ -355,9 +348,8 @@ static void upload_constant_buffer(struct brw_context *brw)
  */
 const struct brw_tracked_state brw_constant_buffer = {
    .dirty = {
-      .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION),      /* plus fp and vp flags */
-      .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
-              BRW_NEW_VERTEX_PROGRAM |
+      .brw  = (BRW_NEW_CLIP |
+              BRW_NEW_CONSTANTS |
               BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
               BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
               BRW_NEW_CURBE_OFFSETS),
@@ -366,4 +358,3 @@ const struct brw_tracked_state brw_constant_buffer = {
    .update = upload_constant_buffer
 };
 
-#endif
index 111edb1506ff67f8c9089ddb73c58f9e8f34910c..23151ae9ed675a5577e88b03ea217017a1679398 100644 (file)
@@ -694,6 +694,17 @@ void brw_init_compile( struct brw_compile *p );
 const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz );
 
 
+struct brw_instruction *brw_alu1( struct brw_compile *p,
+                                 unsigned opcode,
+                                 struct brw_reg dest,
+                                 struct brw_reg src );
+
+struct brw_instruction *brw_alu2(struct brw_compile *p,
+                                unsigned opcode,
+                                struct brw_reg dest,
+                                struct brw_reg src0,
+                                struct brw_reg src1 );
+
 /* Helpers for regular instructions:
  */
 #define ALU1(OP)                                       \
index bda63e8b9a6ddfbf0c0dd2f81215c8ad8b67d91d..2423536dd19d6f8319b7b55bfe41594e481563ff 100644 (file)
@@ -363,10 +363,10 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
 }
 
 
-static struct brw_instruction *brw_alu1( struct brw_compile *p,
-                                        unsigned opcode,
-                                        struct brw_reg dest,
-                                        struct brw_reg src )
+struct brw_instruction *brw_alu1( struct brw_compile *p,
+                                 unsigned opcode,
+                                 struct brw_reg dest,
+                                 struct brw_reg src )
 {
    struct brw_instruction *insn = next_insn(p, opcode);
    brw_set_dest(insn, dest);
@@ -374,11 +374,11 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p,
    return insn;
 }
 
-static struct brw_instruction *brw_alu2(struct brw_compile *p,
-                                       unsigned opcode,
-                                       struct brw_reg dest,
-                                       struct brw_reg src0,
-                                       struct brw_reg src1 )
+struct brw_instruction *brw_alu2(struct brw_compile *p,
+                                unsigned opcode,
+                                struct brw_reg dest,
+                                struct brw_reg src0,
+                                struct brw_reg src1 )
 {
    struct brw_instruction *insn = next_insn(p, opcode);
    brw_set_dest(insn, dest);
index 8e62eb4bd708ef30c5452c46a311f7b7541a138a..3932e9e9394c1281ee9f8aa5e12363ef4bca48ae 100644 (file)
@@ -34,6 +34,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "pipe/p_util.h"
 
 
 
@@ -46,7 +47,7 @@ static void upload_gs_unit( struct brw_context *brw )
    /* CACHE_NEW_GS_PROG */
    if (brw->gs.prog_active) {
       gs.thread0.grf_reg_count =
-        ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1;
+        align(brw->gs.prog_data->total_grf, 16) / 16 - 1;
       gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6;
       gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length;
    }
index 2ba4d3a9284abfa73b014a49e153b1e90e8196be..e600e9d8de5129ca1235445ac528c1c6b33fc504 100644 (file)
@@ -315,7 +315,7 @@ static void upload_pipe_control(struct brw_context *brw)
 
 const struct brw_tracked_state brw_pipe_control = {
    .dirty = {
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_SCENE,
       .cache = 0
    },
    .update = upload_pipe_control
@@ -380,7 +380,7 @@ static void upload_invarient_state( struct brw_context *brw )
 
 const struct brw_tracked_state brw_invarient_state = {
    .dirty = {
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_SCENE,
       .cache = 0
    },
    .update = upload_invarient_state
@@ -416,7 +416,7 @@ static void upload_state_base_address( struct brw_context *brw )
 
 const struct brw_tracked_state brw_state_base_address = {
    .dirty = {
-      .brw = BRW_NEW_CONTEXT,
+      .brw = BRW_NEW_SCENE,
       .cache = 0
    },
    .update = upload_state_base_address
index f009ff37d908a93482fa819e0db19bc40664911f..e7c02beda533cb9021958e6f5eece1471ee44fec 100644 (file)
@@ -36,9 +36,8 @@
 #include "brw_util.h"
 #include "brw_sf.h"
 #include "brw_state.h"
+#include "tgsi/util/tgsi_parse.h"
 
-#if 0
-#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
 
 static void compile_sf_prog( struct brw_context *brw,
                             struct brw_sf_prog_key *key )
@@ -46,7 +45,6 @@ static void compile_sf_prog( struct brw_context *brw,
    struct brw_sf_compile c;
    const unsigned *program;
    unsigned program_size;
-   unsigned i, idx;
 
    memset(&c, 0, sizeof(c));
 
@@ -55,27 +53,17 @@ static void compile_sf_prog( struct brw_context *brw,
    brw_init_compile(&c.func);
 
    c.key = *key;
-   c.nr_attrs = brw_count_bits(c.key.attrs);
+
+
+   c.nr_attrs = c.key.vp_output_count;
    c.nr_attr_regs = (c.nr_attrs+1)/2;
-   c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+
+   c.nr_setup_attrs = c.key.fp_input_count;
    c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
 
    c.prog_data.urb_read_length = c.nr_attr_regs;
    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
 
-   /* Construct map from attribute number to position in the vertex.
-    */
-   for (i = idx = 0; i < VERT_RESULT_MAX; i++)
-      if (c.key.attrs & (1<<i)) {
-        c.attr_to_idx[i] = idx;
-        c.idx_to_attr[idx] = i;
-        if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
-                c.point_attrs[i].CoordReplace =
-                       brw->attribs.Point->CoordReplace[i - VERT_RESULT_TEX0];
-        } else
-                c.point_attrs[i].CoordReplace = FALSE;
-        idx++;
-      }
 
    /* Which primitive?  Or all three?
     */
@@ -90,21 +78,17 @@ static void compile_sf_prog( struct brw_context *brw,
       break;
    case SF_POINTS:
       c.nr_verts = 1;
-      if (key->do_point_sprite)
-         brw_emit_point_sprite_setup( &c );
-      else
-         brw_emit_point_setup( &c );
+      brw_emit_point_setup( &c );
       break;
+
    case SF_UNFILLED_TRIS:
-      c.nr_verts = 3;
-      brw_emit_anyprim_setup( &c );
-      break;
    default:
       assert(0);
       return;
    }
 
 
+
    /* get the program
     */
    program = brw_get_program(&c.func, &program_size);
@@ -142,20 +126,15 @@ static void upload_sf_prog( struct brw_context *brw )
    /* Populate the key, noting state dependencies:
     */
    /* CACHE_NEW_VS_PROG */
-   key.attrs = brw->vs.prog_data->outputs_written;
+   key.vp_output_count = brw->vs.prog_data->outputs_written;
 
    /* BRW_NEW_REDUCED_PRIMITIVE */
    switch (brw->reduced_primitive) {
    case PIPE_PRIM_TRIANGLES:
-      /* NOTE: We just use the edgeflag attribute as an indicator that
-       * unfilled triangles are active.  We don't actually do the
-       * edgeflag testing here, it is already done in the clip
-       * program.
-       */
-      if (key.attrs & (1<<VERT_RESULT_EDGE))
-        key.primitive = SF_UNFILLED_TRIS;
-      else
-        key.primitive = SF_TRIANGLES;
+//      if (key.attrs & (1<<VERT_RESULT_EDGE))
+//      key.primitive = SF_UNFILLED_TRIS;
+//      else
+      key.primitive = SF_TRIANGLES;
       break;
    case PIPE_PRIM_LINES:
       key.primitive = SF_LINES;
@@ -165,16 +144,15 @@ static void upload_sf_prog( struct brw_context *brw )
       break;
    }
 
-   /* BRW_NEW_POINT */
-   key.do_point_sprite = brw->attribs.Point->PointSprite;
-   key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
-   /* BRW_NEW_RASTER */
-   key.do_flat_shading = (brw->attribs.Raster->flatshade);
-   key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
 
-   /* _NEW_POLYGON */
-   if (key.do_twoside_color)
-      key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
+//   key.do_point_sprite = brw->attribs.Point->PointSprite;
+//   key.SpriteOrigin = brw->attribs.Point->SpriteOrigin;
+
+//   key.do_flat_shading = (brw->attribs.Raster->flatshade);
+//   key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide);
+
+//   if (key.do_twoside_color)
+//      key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW);
 
 
    if (!search_cache(brw, &key))
@@ -184,11 +162,150 @@ static void upload_sf_prog( struct brw_context *brw )
 
 const struct brw_tracked_state brw_sf_prog = {
    .dirty = {
-      .brw   = (BRW_NEW_RASTER |
-               BRW_NEW_REDUCED_PRIMITIVE),
-      .cache = CACHE_NEW_VS_PROG
+      .brw   = (BRW_NEW_RASTERIZER |
+               BRW_NEW_REDUCED_PRIMITIVE |
+               BRW_NEW_VS |
+               BRW_NEW_FS),
+      .cache = 0,
    },
    .update = upload_sf_prog
 };
 
-#endif
+
+/* Build a struct like the one we'd like the state tracker to pass to
+ * us.
+ */
+static void update_sf_linkage( struct brw_context *brw )
+{
+   const struct brw_vertex_program *vs = brw->attribs.VertexProgram;
+   const struct brw_fragment_program *fs = brw->attribs.FragmentProgram;
+   struct pipe_setup_linkage state;
+   struct tgsi_parse_context parse;
+
+   int i, j;
+   int nr_vp_outputs = 0;
+   int done = 0;
+
+   struct { 
+      unsigned semantic:8;
+      unsigned semantic_index:16;
+   } fp_semantic[32], vp_semantic[32];
+
+   memset(&state, 0, sizeof(state));
+
+   state.fp_input_count = 0;
+
+
+
+   
+   /* First scan fp inputs
+    */
+   tgsi_parse_init( &parse, fs->program.tokens );
+   while( !done &&
+         !tgsi_parse_end_of_tokens( &parse ) ) 
+   {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+        if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) 
+        {
+           int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+           int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+
+           for (i = first; i < last; i++) {
+              state.fp_input[i].vp_output = ~0;
+              state.fp_input[i].bf_vp_output = ~0;
+              state.fp_input[i].interp_mode = 
+                 parse.FullToken.FullDeclaration.Interpolation.Interpolate;
+
+              fp_semantic[i].semantic = 
+                 parse.FullToken.FullDeclaration.Semantic.SemanticName;
+              fp_semantic[i].semantic_index = 
+                 parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
+
+           }
+
+           assert(last > state.fp_input_count);
+           state.fp_input_count = last;
+        }
+        break;
+      default:
+        done = 1;
+        break;
+      }
+   }
+
+
+   assert(state.fp_input_count == fs->program.num_inputs);
+
+      
+   /* Then scan vp outputs
+    */
+   done = 0;
+   tgsi_parse_init( &parse, vs->program.tokens );
+   while( !done &&
+         !tgsi_parse_end_of_tokens( &parse ) ) 
+   {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+        if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) 
+        {
+           int first = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
+           int last = parse.FullToken.FullDeclaration.u.DeclarationRange.Last;
+
+           for (i = first; i < last; i++) {
+              vp_semantic[i].semantic = 
+                 parse.FullToken.FullDeclaration.Semantic.SemanticName;
+              vp_semantic[i].semantic_index = 
+                 parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
+           }
+           
+           assert(last > nr_vp_outputs);
+           nr_vp_outputs = last;
+        }
+        break;
+      default:
+        done = 1;
+        break;
+      }
+   }
+
+
+   /* Now match based on semantic information.
+    */
+   for (i = 0; i< state.fp_input_count; i++) {
+      for (j = 0; j < nr_vp_outputs; j++) {
+        if (fp_semantic[i].semantic == vp_semantic[j].semantic &&
+            fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
+           state.fp_input[i].vp_output = j;
+        }
+      }
+      if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) {
+        for (j = 0; j < nr_vp_outputs; j++) {
+           if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic &&
+               fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) {
+              state.fp_input[i].bf_vp_output = j;
+           }
+        }
+      }
+   }
+
+   if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) {
+      brw->sf.linkage = state;
+      brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE;
+   }
+}
+
+
+const struct brw_tracked_state brw_sf_linkage = {
+   .dirty = {
+      .brw   = (BRW_NEW_VS |
+               BRW_NEW_FS),
+      .cache = 0,
+   },
+   .update = update_sf_linkage
+};
+
index d04388325d231f1ffe3dd551d23b99523ad63650..b7ada4756047ac9381580a6b23b43921d93a8104 100644 (file)
 #define SF_TRIANGLES 2
 #define SF_UNFILLED_TRIS   3
 
+
+
 struct brw_sf_prog_key {
-   unsigned attrs:32;
+   unsigned vp_output_count:5;
+   unsigned fp_input_count:5;
+
    unsigned primitive:2;
    unsigned do_twoside_color:1;
    unsigned do_flat_shading:1;
    unsigned frontface_ccw:1;
    unsigned do_point_sprite:1;
-   unsigned pad:10;
-   int SpriteOrigin;
+
+   /* Interpolation masks;
+    */
+   unsigned linear_mask;
+   unsigned persp_mask;
+   unsigned const_mask;
+
+
+//   int SpriteOrigin;
 };
 
 struct brw_sf_point_tex {
index 93f23171f2e7656dc3971709a061a57b7029f718..834b5efdfef2bb515e3874fff2d8eeaa2172d3e8 100644 (file)
 #include "brw_util.h"
 #include "brw_sf.h"
 
-#if 0
-static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
-                                   struct brw_reg vert,
-                                   unsigned attr)
-{
-   unsigned off = c->attr_to_idx[attr] / 2;
-   unsigned sub = c->attr_to_idx[attr] % 2;
-
-   return brw_vec4_grf(vert.nr + off, sub * 4);
-}
-
-static boolean have_attr(struct brw_sf_compile *c,
-                          unsigned attr)
-{
-   return (c->key.attrs & (1<<attr)) ? 1 : 0;
-}
-
-
-
-/***********************************************************************
- * Twoside lighting
- */
-static void copy_bfc( struct brw_sf_compile *c,
-                     struct brw_reg vert )
-{
-   struct brw_compile *p = &c->func;
-   unsigned i;
-
-   for (i = 0; i < 2; i++) {
-      if (have_attr(c, VERT_RESULT_COL0+i) &&
-         have_attr(c, VERT_RESULT_BFC0+i))
-        brw_MOV(p,
-                get_vert_attr(c, vert, VERT_RESULT_COL0+i),
-                get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
-   }
-}
-
-
-static void do_twoside_color( struct brw_sf_compile *c )
-{
-   struct brw_compile *p = &c->func;
-   struct brw_instruction *if_insn;
-   unsigned backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
-
-   /* Already done in clip program:
-    */
-   if (c->key.primitive == SF_UNFILLED_TRIS)
-      return;
-
-   /* XXX: What happens if BFC isn't present?  This could only happen
-    * for user-supplied vertex programs, as t_vp_build.c always does
-    * the right thing.
-    */
-   if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
-       !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
-      return;
-
-   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
-    * to get all channels active inside the IF.  In the clipping code
-    * we run with NoMask, so it's not an option and we can use
-    * BRW_EXECUTE_1 for all comparisions.
-    */
-   brw_push_insn_state(p);
-   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
-   if_insn = brw_IF(p, BRW_EXECUTE_4);
-   {
-      switch (c->nr_verts) {
-      case 3: copy_bfc(c, c->vert[2]);
-      case 2: copy_bfc(c, c->vert[1]);
-      case 1: copy_bfc(c, c->vert[0]);
-      }
-   }
-   brw_ENDIF(p, if_insn);
-   brw_pop_insn_state(p);
-}
-
-
-
-/***********************************************************************
- * Flat shading
- */
-
-#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
-                                 (1<<VERT_RESULT_COL1))
-
-static void copy_colors( struct brw_sf_compile *c,
-                    struct brw_reg dst,
-                    struct brw_reg src)
-{
-   struct brw_compile *p = &c->func;
-   unsigned i;
-
-   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
-      if (have_attr(c,i))
-        brw_MOV(p,
-                get_vert_attr(c, dst, i),
-                get_vert_attr(c, src, i));
-   }
-}
-
-
-
-/* Need to use a computed jump to copy flatshaded attributes as the
- * vertices are ordered according to y-coordinate before reaching this
- * point, so the PV could be anywhere.
- */
-static void do_flatshade_triangle( struct brw_sf_compile *c )
-{
-   struct brw_compile *p = &c->func;
-   struct brw_reg ip = brw_ip_reg();
-   unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
-   if (!nr)
-      return;
-
-   /* Already done in clip program:
-    */
-   if (c->key.primitive == SF_UNFILLED_TRIS)
-      return;
-
-   brw_push_insn_state(p);
-
-   brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1));
-   brw_JMPI(p, ip, ip, c->pv);
-
-   copy_colors(c, c->vert[1], c->vert[0]);
-   copy_colors(c, c->vert[2], c->vert[0]);
-   brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1));
-
-   copy_colors(c, c->vert[0], c->vert[1]);
-   copy_colors(c, c->vert[2], c->vert[1]);
-   brw_JMPI(p, ip, ip, brw_imm_ud(nr*2));
-
-   copy_colors(c, c->vert[0], c->vert[2]);
-   copy_colors(c, c->vert[1], c->vert[2]);
-
-   brw_pop_insn_state(p);
-}
-
-
-static void do_flatshade_line( struct brw_sf_compile *c )
-{
-   struct brw_compile *p = &c->func;
-   struct brw_reg ip = brw_ip_reg();
-   unsigned nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
-
-   if (!nr)
-      return;
-
-   /* Already done in clip program:
-    */
-   if (c->key.primitive == SF_UNFILLED_TRIS)
-      return;
-
-   brw_push_insn_state(p);
-
-   brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1));
-   brw_JMPI(p, ip, ip, c->pv);
-   copy_colors(c, c->vert[1], c->vert[0]);
-
-   brw_JMPI(p, ip, ip, brw_imm_ud(nr));
-   copy_colors(c, c->vert[0], c->vert[1]);
-
-   brw_pop_insn_state(p);
-}
-
 
 
 /***********************************************************************
@@ -277,9 +112,6 @@ static void copy_z_inv_w( struct brw_sf_compile *c )
 
 static void invert_det( struct brw_sf_compile *c)
 {
-   /* Looks like we invert all 8 elements just to get 1/det in
-    * position 2 !?!
-    */
    brw_math(&c->func,
            c->inv_det,
            BRW_MATH_FUNCTION_INV,
@@ -302,22 +134,16 @@ static boolean calculate_masks( struct brw_sf_compile *c,
                                  ushort *pc_linear)
 {
    boolean is_last_attr = (reg == c->nr_setup_regs - 1);
-   unsigned persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS;
-   unsigned linear_mask;
 
-   if (c->key.do_flat_shading)
-      linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
-   else
-      linear_mask = c->key.attrs;
 
    *pc_persp = 0;
    *pc_linear = 0;
    *pc = 0xf;
 
-   if (persp_mask & (1 << c->idx_to_attr[reg*2]))
-      *pc_persp = 0xf;
+//   if (persp_mask & (1 << c->idx_to_attr[reg*2]))
+//      *pc_persp = 0xf;
 
-   if (linear_mask & (1 << c->idx_to_attr[reg*2]))
+//   if (linear_mask & (1 << c->idx_to_attr[reg*2]))
       *pc_linear = 0xf;
 
    /* Maybe only processs one attribute on the final round:
@@ -325,10 +151,10 @@ static boolean calculate_masks( struct brw_sf_compile *c,
    if (reg*2+1 < c->nr_setup_attrs) {
       *pc |= 0xf0;
 
-      if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
-        *pc_persp |= 0xf0;
+//      if (persp_mask & (1 << c->idx_to_attr[reg*2+1]))
+//      *pc_persp |= 0xf0;
 
-      if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
+//      if (linear_mask & (1 << c->idx_to_attr[reg*2+1]))
         *pc_linear |= 0xf0;
    }
 
@@ -347,12 +173,6 @@ void brw_emit_tri_setup( struct brw_sf_compile *c )
    invert_det(c);
    copy_z_inv_w(c);
 
-   if (c->key.do_twoside_color)
-      do_twoside_color(c);
-
-   if (c->key.do_flat_shading)
-      do_flatshade_triangle(c);
-
 
    for (i = 0; i < c->nr_setup_regs; i++)
    {
@@ -433,9 +253,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c )
    invert_det(c);
    copy_z_inv_w(c);
 
-   if (c->key.do_flat_shading)
-      do_flatshade_line(c);
-
    for (i = 0; i < c->nr_setup_regs; i++)
    {
       /* Pair of incoming attributes:
@@ -491,86 +308,6 @@ void brw_emit_line_setup( struct brw_sf_compile *c )
    }
 }
 
-void brw_emit_point_sprite_setup( struct brw_sf_compile *c )
-{
-   struct brw_compile *p = &c->func;
-   unsigned i;
-
-   c->nr_verts = 1;
-   alloc_regs(c);
-   copy_z_inv_w(c);
-   for (i = 0; i < c->nr_setup_regs; i++)
-   {
-      struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
-      struct brw_reg a0 = offset(c->vert[0], i);
-      ushort pc, pc_persp, pc_linear;
-      boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
-
-      if (pc_persp)
-      {
-         if (!tex->CoordReplace) {
-             brw_set_predicate_control_flag_value(p, pc_persp);
-             brw_MUL(p, a0, a0, c->inv_w[0]);
-         }
-      }
-
-      if (tex->CoordReplace) {
-         /* Caculate 1.0/PointWidth */
-         brw_math(&c->func,
-                 c->tmp,
-                 BRW_MATH_FUNCTION_INV,
-                 BRW_MATH_SATURATE_NONE,
-                 0,
-                 c->dx0,
-                 BRW_MATH_DATA_SCALAR,
-                 BRW_MATH_PRECISION_FULL);
-
-         if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
-               brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-               brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-               brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
-               brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-         } else {
-               brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-               brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-               brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
-               brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-         }
-      } else {
-         brw_MOV(p, c->m1Cx, brw_imm_ud(0));
-         brw_MOV(p, c->m2Cy, brw_imm_ud(0));
-      }
-
-      {
-        brw_set_predicate_control_flag_value(p, pc);
-        if (tex->CoordReplace) {
-            if (c->key.SpriteOrigin == GL_UPPER_LEFT) {
-                brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
-                brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
-            }
-            else
-                brw_MOV(p, c->m3C0, brw_imm_f(0.0));
-        } else {
-               brw_MOV(p, c->m3C0, a0); /* constant value */
-        }
-
-        /* Copy m0..m3 to URB.
-         */
-        brw_urb_WRITE(p,
-                      brw_null_reg(),
-                      0,
-                      brw_vec8_grf(0, 0),
-                      0,       /* allocate */
-                      1,       /* used */
-                      4,       /* msg len */
-                      0,       /* response len */
-                      last,    /* eot */
-                      last,    /* writes complete */
-                      i*4,     /* urb destination offset */
-                      BRW_URB_SWIZZLE_TRANSPOSE);
-      }
-   }
-}
 
 /* Points setup - several simplifications as all attributes are
  * constant across the face of the point (point sprites excluded!)
@@ -629,68 +366,3 @@ void brw_emit_point_setup( struct brw_sf_compile *c )
       }
    }
 }
-
-void brw_emit_anyprim_setup( struct brw_sf_compile *c )
-{
-   struct brw_compile *p = &c->func;
-   struct brw_reg ip = brw_ip_reg();
-   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
-   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
-   struct brw_reg primmask;
-   struct brw_instruction *jmp;
-   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
-
-   alloc_regs(c);
-
-   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
-
-   brw_MOV(p, primmask, brw_imm_ud(1));
-   brw_SHL(p, primmask, primmask, payload_prim);
-
-   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
-   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
-                                              (1<<_3DPRIM_TRISTRIP) |
-                                              (1<<_3DPRIM_TRIFAN) |
-                                              (1<<_3DPRIM_TRISTRIP_REVERSE) |
-                                              (1<<_3DPRIM_POLYGON) |
-                                              (1<<_3DPRIM_RECTLIST) |
-                                              (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
-   jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
-   {
-      brw_push_insn_state(p);
-      brw_emit_tri_setup( c );
-      brw_pop_insn_state(p);
-      /* note - thread killed in subroutine */
-   }
-   brw_land_fwd_jump(p, jmp);
-
-   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
-   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
-                                              (1<<_3DPRIM_LINESTRIP) |
-                                              (1<<_3DPRIM_LINELOOP) |
-                                              (1<<_3DPRIM_LINESTRIP_CONT) |
-                                              (1<<_3DPRIM_LINESTRIP_BF) |
-                                              (1<<_3DPRIM_LINESTRIP_CONT_BF)));
-   jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
-   {
-      brw_push_insn_state(p);
-      brw_emit_line_setup( c );
-      brw_pop_insn_state(p);
-      /* note - thread killed in subroutine */
-   }
-   brw_land_fwd_jump(p, jmp);
-
-   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
-   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
-   jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
-   {
-      brw_push_insn_state(p);
-      brw_emit_point_sprite_setup( c );
-      brw_pop_insn_state(p);
-   }
-   brw_land_fwd_jump(p, jmp);
-
-   brw_emit_point_setup( c );
-}
-
-#endif
index 7b6ee215eb80d7f576f171e0571ee513a8c2273d..0de6e7240e87a7cc78044bc1e9f35413f8963c7c 100644 (file)
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "pipe/p_util.h"
 
-#if 0
 static void upload_sf_vp(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_sf_viewport sfv;
-   struct intel_renderbuffer *irb =
-      intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0][0]);
-   float y_scale, y_bias;
-   int x, y, w, h, x1, x2, y1, y2;
-   int draw_h = ctx->DrawBuffer->Height;
 
    memset(&sfv, 0, sizeof(sfv));
 
-   if (ctx->DrawBuffer->Name) {
-      /* User-created FBO */
-      if (irb && !irb->RenderToTexture) {
-        y_scale = -1.0;
-        y_bias = draw_h;
-      } else {
-        y_scale = 1.0;
-        y_bias = 0;
-      }
-   } else {
-      if (brw->intel.driDrawable != NULL) {
-        y_scale = -1.0;
-        y_bias = draw_h;
-      } else {
-        y_scale = 1.0;
-        y_bias = 0;
-      }
-   }
 
-   /* _NEW_VIEWPORT, BRW_NEW_METAOPS */
-
-   if (!brw->metaops.active) {
-      const float *v = brw->intel.ctx.Viewport._WindowMap.m;
-
-      sfv.viewport.m00 = v[MAT_SX];
-      sfv.viewport.m11 = v[MAT_SY] * y_scale;
-      sfv.viewport.m22 = v[MAT_SZ] * brw->intel.depth_scale;
-      sfv.viewport.m30 = v[MAT_TX];
-      sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
-      sfv.viewport.m32 = v[MAT_TZ] * brw->intel.depth_scale;
-   } else {
-      sfv.viewport.m00 =   1;
-      sfv.viewport.m11 = - 1;
-      sfv.viewport.m22 =   1;
-      sfv.viewport.m30 =   0;
-      sfv.viewport.m31 =   brw->intel.driDrawable->h;
-      sfv.viewport.m32 =   0;
-   }
+   /* BRW_NEW_VIEWPORT */
+   {
+      const float *scale = brw->attribs.Viewport.scale;
+      const float *trans = brw->attribs.Viewport.translate;
 
-   /* _NEW_SCISSOR */
-   x = brw->attribs.Scissor->X;
-   y = brw->attribs.Scissor->Y;
-   w = brw->attribs.Scissor->Width;
-   h = brw->attribs.Scissor->Height;
-
-   if (ctx->DrawBuffer->Name == 0) {
-      x1 = x;
-      y1 = draw_h - (y + h);
-      x2 = x + w - 1;
-      y2 = y1 + h - 1;
-   } else {
-      /* FBO has non-inverted coords. */
-      x1 = x;
-      y1 = y;
-      x2 = x + w - 1;
-      y2 = y + h - 1;
+      sfv.viewport.m00 = scale[0];
+      sfv.viewport.m11 = scale[1];
+      sfv.viewport.m22 = scale[2]; 
+      sfv.viewport.m30 = trans[0];
+      sfv.viewport.m31 = trans[1];
+      sfv.viewport.m32 = trans[2];
    }
 
-   sfv.scissor.xmin = CLAMP(x1, 0, ctx->DrawBuffer->Width - 1);
-   sfv.scissor.xmax = CLAMP(y1, 0, ctx->DrawBuffer->Height - 1);
-   sfv.scissor.ymin = CLAMP(x2, 0, ctx->DrawBuffer->Width - 1);
-   sfv.scissor.ymax = CLAMP(y2, 0, ctx->DrawBuffer->Height - 1);
+   /* _NEW_SCISSOR */
+   sfv.scissor.xmin = brw->attribs.Scissor.minx;
+   sfv.scissor.xmax = brw->attribs.Scissor.maxx;
+   sfv.scissor.ymin = brw->attribs.Scissor.miny;
+   sfv.scissor.ymax = brw->attribs.Scissor.maxy;
 
    brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv );
 }
 
 const struct brw_tracked_state brw_sf_vp = {
    .dirty = {
-      .mesa  = (_NEW_VIEWPORT |
-               _NEW_SCISSOR),
-      .brw   = BRW_NEW_METAOPS,
+      .brw   = (BRW_NEW_SCISSOR |
+               BRW_NEW_VIEWPORT),
       .cache = 0
    },
    .update = upload_sf_vp
@@ -130,7 +80,7 @@ static void upload_sf_unit( struct brw_context *brw )
    memset(&sf, 0, sizeof(sf));
 
    /* CACHE_NEW_SF_PROG */
-   sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
+   sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1;
    sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6;
    sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
 
@@ -151,19 +101,19 @@ static void upload_sf_unit( struct brw_context *brw )
 
    /* CACHE_NEW_SF_VP */
    sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5;
-
    sf.sf5.viewport_transform = 1;
 
-   /* _NEW_SCISSOR */
-   if (brw->attribs.Scissor->Enabled)
+   /* BRW_NEW_RASTER */
+   if (brw->attribs.Raster->scissor)
       sf.sf6.scissor = 1;
 
-   /* _NEW_POLYGON */
+#if 0
    if (brw->attribs.Polygon->FrontFace == GL_CCW)
       sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
    else
       sf.sf5.front_winding = BRW_FRONTWINDING_CW;
 
+
    if (brw->attribs.Polygon->CullFlag) {
       switch (brw->attribs.Polygon->CullFaceMode) {
       case GL_FRONT:
@@ -182,25 +132,24 @@ static void upload_sf_unit( struct brw_context *brw )
    }
    else
       sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+#else
+   sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+   sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+#endif
 
-
-   /* _NEW_LINE */
-   /* XXX use ctx->Const.Min/MaxLineWidth here */
-   sf.sf6.line_width = CLAMP(brw->attribs.Line->Width, 1.0, 5.0) * (1<<1);
+   sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1);
 
    sf.sf6.line_endcap_aa_region_width = 1;
-   if (brw->attribs.Line->SmoothFlag)
+   if (brw->attribs.Raster->line_smooth)
       sf.sf6.aa_enable = 1;
    else if (sf.sf6.line_width <= 0x2)
        sf.sf6.line_width = 0;
 
-   /* _NEW_POINT */
    sf.sf6.point_rast_rule = 1; /* opengl conventions */
-   /* XXX clamp max depends on AA vs. non-AA */
 
-   sf.sf7.sprite_point = brw->attribs.Point->PointSprite;
-   sf.sf7.point_size = CLAMP(brw->attribs.Point->Size, 1.0, 255.0) * (1<<3);
-   sf.sf7.use_point_size_state = !brw->attribs.Point->_Attenuated;
+   sf.sf7.sprite_point = brw->attribs.Raster->point_sprite;
+   sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3);
+   sf.sf7.use_point_size_state = brw->attribs.Raster->point_size_per_vertex;
 
    /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
     */
@@ -220,12 +169,8 @@ static void upload_sf_unit( struct brw_context *brw )
 
 const struct brw_tracked_state brw_sf_unit = {
    .dirty = {
-      .mesa  = (_NEW_POLYGON |
-               _NEW_LINE |
-               _NEW_POINT |
-               _NEW_SCISSOR),
-      .brw   = (BRW_NEW_URB_FENCE |
-               BRW_NEW_METAOPS),
+      .brw   = (BRW_NEW_RASTERIZER |
+               BRW_NEW_URB_FENCE),
       .cache = (CACHE_NEW_SF_VP |
                CACHE_NEW_SF_PROG)
    },
@@ -233,4 +178,3 @@ const struct brw_tracked_state brw_sf_unit = {
 };
 
 
-#endif
diff --git a/src/mesa/pipe/i965simple/brw_shader_info.c b/src/mesa/pipe/i965simple/brw_shader_info.c
new file mode 100644 (file)
index 0000000..431b454
--- /dev/null
@@ -0,0 +1,49 @@
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+
+
+
+
+void brw_shader_info(const struct tgsi_token *tokens,
+                    struct brw_shader_info *info )
+{
+   struct tgsi_parse_context parse;
+   int done = 0;
+
+   tgsi_parse_init( &parse, tokens );
+
+   while( !done &&
+         !tgsi_parse_end_of_tokens( &parse ) ) 
+   {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+      {
+        const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+        unsigned last = decl->u.DeclarationRange.Last;
+        
+        assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
+      
+        // Broken by crazy wpos init:
+        //assert( info->nr_regs[decl->Declaration.File] <= last);
+
+        info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File],
+                                                     last+1);
+        break;
+      }
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+      default:
+        done = 1;
+        break;
+      }
+   }
+
+   tgsi_parse_free (&parse);
+   
+}
index f1eba146c4f590505e623042875350767b7daac9..032a4e02850308711afed6efb76844327e2e7487 100644 (file)
@@ -198,6 +198,10 @@ static void * brw_create_fs_state(struct pipe_context *pipe,
    /* XXX: Do I have to duplicate the tokens as well??
     */
    brw_fp->program = *shader;
+   brw_fp->id = brw_context(pipe)->program_id++;
+
+   brw_shader_info(shader->tokens,
+                  &brw_fp->info);
 
    return (void *)brw_fp;
 }
@@ -228,6 +232,9 @@ static void *brw_create_vs_state(struct pipe_context *pipe,
    /* XXX: Do I have to duplicate the tokens as well??
     */
    brw_vp->program = *shader;
+   brw_vp->id = brw_context(pipe)->program_id++;
+   brw_shader_info(shader->tokens,
+                  &brw_vp->info);
 
    tgsi_dump(shader->tokens, 0);
 
index 4dabfe808220669b10dae7fad400223a48fc3278..d09711f6f07c4db5d1d46ac0c7b12e839879149f 100644 (file)
@@ -154,4 +154,11 @@ void brw_upload_clip_prog(struct brw_context *brw);
 void brw_upload_blend_constant_color(struct brw_context *brw);
 void brw_upload_wm_samplers(struct brw_context *brw);
 
+/* brw_shader_info.c
+ */
+
+void brw_shader_info(const struct tgsi_token *tokens,
+                    struct brw_shader_info *info );
+
+
 #endif
index 13e262d2e55e59671cf5802367f39f22b0037455..c5738733f4e736ddfb73495b472c56f19251919a 100644 (file)
@@ -178,8 +178,9 @@ unsigned brw_upload_cache( struct brw_cache *cache,
 
    if (BRW_DEBUG & DEBUG_STATE)
       printf("upload %s: %d bytes to pool buffer %p offset %x\n",
-             cache->name, data_size,
-             cache->pool->buffer,
+             cache->name, 
+            data_size,
+             (void*)cache->pool->buffer,
              offset);
 
    /* Copy data to the buffer:
index a490049024d9cf6f2a9f097f2d6cb2c603cbfe7c..78268ed8f2e765d23d30eaf53dcdf84b9b4c384a 100644 (file)
  */
 
 #include "pipe/p_winsys.h"
+#include "pipe/p_util.h"
 #include "brw_context.h"
 #include "brw_state.h"
 
 boolean brw_pool_alloc( struct brw_mem_pool *pool,
                          unsigned size,
-                         unsigned align,
+                         unsigned alignment,
                          unsigned *offset_return)
 {
-   unsigned fixup = ALIGN(pool->offset, align) - pool->offset;
+   unsigned fixup = align(pool->offset, alignment) - pool->offset;
 
-   size = ALIGN(size, 4);
+   size = align(size, 4);
 
    if (pool->offset + fixup + size >= pool->size) {
       printf("%s failed\n", __FUNCTION__);
@@ -114,7 +115,7 @@ void brw_pool_check_wrap( struct brw_context *brw,
                          struct brw_mem_pool *pool )
 {
    if (pool->offset > (pool->size * 3) / 4) {
-      brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+      brw->state.dirty.brw |= BRW_NEW_SCENE;
    }
 
 }
index 10f1d4812aa9c5ec229f42b6f3d58daa2de12421..1fb480172db57513f5a4e3c4c4774ffbe8acec77 100644 (file)
@@ -97,8 +97,6 @@ const struct brw_tracked_state *atoms[] =
 
 void brw_init_state( struct brw_context *brw )
 {
-   unsigned i;
-
    brw_init_pools(brw);
    brw_init_caches(brw);
 
@@ -156,7 +154,7 @@ void brw_validate_state( struct brw_context *brw )
        state->brw == 0)
       return;
 
-   if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+   if (brw->state.dirty.brw & BRW_NEW_SCENE)
       brw_clear_batch_cache_flush(brw);
 
    if (BRW_DEBUG) {
index b9514be0c298bf47c7216532a101ab30d0416641..7d6e2851b17d0d60e08910e50d9e7b0f7a553f5f 100644 (file)
@@ -149,10 +149,10 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
       unsigned mip1_width;
 
       if (pt->compressed) {
-         mip1_width = ALIGN(minify(pt->width[0]), align_w)
-                      + ALIGN(minify(minify(pt->width[0])), align_w);
+         mip1_width = align(minify(pt->width[0]), align_w)
+                      + align(minify(minify(pt->width[0])), align_w);
       } else {
-         mip1_width = ALIGN(minify(pt->width[0]), align_w)
+         mip1_width = align(minify(pt->width[0]), align_w)
                       + minify(minify(pt->width[0]));
       }
 
@@ -164,7 +164,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
    /* Pitch must be a whole number of dwords, even though we
     * express it in texels.
     */
-   tex->pitch = ALIGN(tex->pitch * pt->cpp, 4) / pt->cpp;
+   tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp;
    tex->total_height = 0;
 
    for ( level = pt->first_level ; level <= pt->last_level ; level++ ) {
@@ -176,7 +176,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
       if (pt->compressed)
         img_height = MAX2(1, height/4);
       else
-        img_height = ALIGN(height, align_h);
+        img_height = align(height, align_h);
 
 
       /* Because the images are packed better, the final offset
@@ -187,7 +187,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex)
       /* Layout_below: step right after second mipmap.
        */
       if (level == pt->first_level + 1) {
-        x += ALIGN(width, align_w);
+        x += align(width, align_w);
       }
       else {
         y += img_height;
@@ -221,13 +221,13 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
 #if 0
       if (pt->compressed) {
          align_w = intel_compressed_alignment(pt->internal_format);
-         pt->pitch = ALIGN(width, align_w);
+         pt->pitch = align(width, align_w);
          pack_y_pitch = (height + 3) / 4;
       } else
 #endif
       {
-         tex->pitch = ALIGN(pt->width[0] * pt->cpp, 4) / pt->cpp;
-         pack_y_pitch = ALIGN(pt->height[0], align_h);
+         tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp;
+         pack_y_pitch = align(pt->height[0], align_h);
       }
 
       pack_x_pitch = tex->pitch;
@@ -262,8 +262,8 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
          if (pt->compressed) {
             pack_y_pitch = (height + 3) / 4;
 
-            if (pack_x_pitch > ALIGN(width, align_w)) {
-               pack_x_pitch = ALIGN(width, align_w);
+            if (pack_x_pitch > align(width, align_w)) {
+               pack_x_pitch = align(width, align_w);
                pack_x_nr <<= 1;
             }
          } else {
@@ -275,7 +275,7 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
 
             if (pack_y_pitch > 2) {
                pack_y_pitch >>= 1;
-               pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+               pack_y_pitch = align(pack_y_pitch, align_h);
             }
          }
 
@@ -305,8 +305,6 @@ brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
                                      sizeof(struct brw_texture));
 
    if (tex) {
-      struct brw_context *brw = brw_context(pipe);
-
       memset(&tex->base + 1, 0,
             sizeof(struct brw_texture) - sizeof(struct pipe_texture));
 
index 7d6fb383b9e3aca13a3bcf54a7a378113d3308e9..c73469929ceb7b02c257f79048420cc639bf593c 100644 (file)
@@ -44,7 +44,7 @@ static void upload_vs_unit( struct brw_context *brw )
 
    /* CACHE_NEW_VS_PROG */
    vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6;
-   vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+   vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1;
    vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
    vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
    vs.thread3.dispatch_grf_start_reg = 1;
index f4e3166e1cfaf9c47d2d5b478d8d6d38b7759c39..f0a38d384bd5b180125ac9d9672b2c81963f2d24 100644 (file)
@@ -33,7 +33,9 @@
 #include "brw_context.h"
 #include "brw_util.h"
 #include "brw_wm.h"
+#include "brw_eu.h"
 #include "brw_state.h"
+#include "pipe/p_util.h"
 
 
 
@@ -41,24 +43,22 @@ static void do_wm_prog( struct brw_context *brw,
                        struct brw_fragment_program *fp,
                        struct brw_wm_prog_key *key)
 {
-   struct brw_wm_compile *c;
+   struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile);
    const unsigned *program;
    unsigned program_size;
 
-   c = brw->wm.compile_data;
-   if (c == NULL) {
-     brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
-     c = brw->wm.compile_data;
-   } else {
-     memset(c, 0, sizeof(*brw->wm.compile_data));
-   }
-   memcpy(&c->key, key, sizeof(*key));
-
+   c->key = *key;
    c->fp = fp;
+   
+   c->delta_xy[0] = brw_null_reg();
+   c->delta_xy[1] = brw_null_reg();
+   c->pixel_xy[0] = brw_null_reg();
+   c->pixel_xy[1] = brw_null_reg();
+   c->pixel_w = brw_null_reg();
+
 
    fprintf(stderr, "XXXXXXXX FP\n");
    
-
    brw_wm_glsl_emit(c);
 
    /* get the program
@@ -74,6 +74,8 @@ static void do_wm_prog( struct brw_context *brw,
                                              program_size,
                                              &c->prog_data,
                                              &brw->wm.prog_data );
+
+   FREE(c);
 }
 
 
@@ -86,8 +88,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
       (struct brw_fragment_program *)brw->attribs.FragmentProgram;
    unsigned lookup = 0;
    unsigned line_aa;
-   unsigned i;
-
+   
    memset(key, 0, sizeof(*key));
 
    /* Build the index for table lookup
@@ -204,7 +205,6 @@ static void brw_upload_wm_prog( struct brw_context *brw )
 const struct brw_tracked_state brw_wm_prog = {
    .dirty = {
       .brw   = (BRW_NEW_FS |
-               BRW_NEW_WM_INPUT_DIMENSIONS |
                BRW_NEW_REDUCED_PRIMITIVE),
       .cache = 0
    },
index a394e25da3a908bad89ebe1cf6ba5d20c11fa2c2..a1ac0f504a63f731410a5dcb8387f54bffe89192 100644 (file)
@@ -60,86 +60,19 @@ struct brw_wm_prog_key {
    unsigned aa_dest_stencil_reg:3;
    unsigned dest_depth_reg:3;
    unsigned nr_depth_regs:3;
-   unsigned projtex_mask:8;
    unsigned shadowtex_mask:8;
    unsigned computes_depth:1;  /* could be derived from program string */
    unsigned source_depth_to_render_target:1;
-   unsigned flat_shade:1;
    unsigned runtime_check_aads_emit:1;
-   
-   unsigned yuvtex_mask:8;
-   unsigned pad1:24;
-
-   unsigned program_string_id:32;
-};
-
-
-/* A bit of a glossary:
- *
- * brw_wm_value: A computed value or program input.  Values are
- * constant, they are created once and are never modified.  When a
- * fragment program register is written or overwritten, new values are
- * created fresh, preserving the rule that values are constant.
- *
- * brw_wm_ref: A reference to a value.  Wherever a value used is by an
- * instruction or as a program output, that is tracked with an
- * instance of this struct.  All references to a value occur after it
- * is created.  After the last reference, a value is dead and can be
- * discarded.
- *
- * brw_wm_grf: Represents a physical hardware register.  May be either
- * empty or hold a value.  Register allocation is the process of
- * assigning values to grf registers.  This occurs in pass2 and the
- * brw_wm_grf struct is not used before that.
- *
- * Fragment program registers: These are time-varying constructs that
- * are hard to reason about and which we translate away in pass0.  A
- * single fragment program register element (eg. temp[0].x) will be
- * translated to one or more brw_wm_value structs, one for each time
- * that temp[0].x is written to during the program. 
- */
-
-
 
-/* Used in pass2 to track register allocation.
- */
-struct brw_wm_grf {
-   struct brw_wm_value *value;
-   unsigned nextuse;
-};
+   unsigned yuvtex_mask:8;
 
-struct brw_wm_value {
-   struct brw_reg hw_reg;      /* emitted to this reg, may not always be there */
-   struct brw_wm_ref *lastuse;
-   struct brw_wm_grf *resident; 
-   unsigned contributes_to_output:1;
-   unsigned spill_slot:16;     /* if non-zero, spill immediately after calculation */
+   unsigned program_string_id;
 };
 
-struct brw_wm_ref {
-   struct brw_reg hw_reg;      /* nr filled in in pass2, everything else, pass0 */
-   struct brw_wm_value *value;
-   struct brw_wm_ref *prevuse;
-   unsigned unspill_reg:7;     /* unspill to reg */
-   unsigned emitted:1;
-   unsigned insn:24;
-};
 
-struct brw_wm_constref {
-   const struct brw_wm_ref *ref;
-   float constval;
-};
 
 
-struct brw_wm_instruction {
-   struct brw_wm_value *dst[4];
-   struct brw_wm_ref *src[3][4];
-   unsigned opcode:8;
-   unsigned saturate:1;
-   unsigned writemask:4;
-   unsigned tex_unit:4;   /* texture unit for TEX, TXD, TXP instructions */
-   unsigned tex_idx:3;    /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
-};
 
 #define PROGRAM_INTERNAL_PARAM
 #define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */
@@ -151,124 +84,59 @@ struct brw_wm_instruction {
 #define BRW_WM_MAX_CONST 256
 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
 
-
-
-/* New opcodes to track internal operations required for WM unit.
- * These are added early so that the registers used can be tracked,
- * freed and reused like those of other instructions.
- */
-#define WM_PIXELXY        (TGSI_OPCODE_LAST)
-#define WM_DELTAXY        (TGSI_OPCODE_LAST + 1)
-#define WM_PIXELW         (TGSI_OPCODE_LAST + 2)
-#define WM_LINTERP        (TGSI_OPCODE_LAST + 3)
-#define WM_PINTERP        (TGSI_OPCODE_LAST + 4)
-#define WM_CINTERP        (TGSI_OPCODE_LAST + 5)
-#define WM_WPOSXY         (TGSI_OPCODE_LAST + 6)
-#define WM_FB_WRITE       (TGSI_OPCODE_LAST + 7)
-#define MAX_WM_OPCODE     (TGSI_OPCODE_LAST + 8)
-
 #define PAYLOAD_DEPTH     (PIPE_ATTRIB_MAX)
 
+#define MAX_IFSN 32
+#define MAX_LOOP_DEPTH 32
+
 struct brw_wm_compile {
    struct brw_compile func;
    struct brw_wm_prog_key key;
-   struct brw_wm_prog_data prog_data;
+   struct brw_wm_prog_data prog_data; /* result */
 
    struct brw_fragment_program *fp;
 
-   float (*env_param)[4];
-
-   enum {
-      START,
-      PASS2_DONE
-   } state;
-
-   /* Initial pass - translate fp instructions to fp instructions,
-    * simplifying and adding instructions for interpolation and
-    * framebuffer writes.
-    */
-   const struct pipe_shader_state *prog_instructions;
-   unsigned nr_fp_insns;
-   unsigned fp_temp;
-   unsigned fp_interp_emitted;
-   unsigned fp_deriv_emitted;
-
-   struct tgsi_src_register pixel_xy;
-   struct tgsi_src_register delta_xy;
-   struct tgsi_src_register pixel_w;
-
-
-   struct brw_wm_value vreg[BRW_WM_MAX_VREG];
-   unsigned nr_vreg;
-
-   struct brw_wm_value creg[BRW_WM_MAX_PARAM];
-   unsigned nr_creg;
+   unsigned grf_limit;
+   unsigned max_wm_grf;
 
-   struct {
-      struct brw_wm_value depth[4]; /* includes r0/r1 */
-      struct brw_wm_value input_interp[PIPE_ATTRIB_MAX];
-   } payload;
 
+   struct brw_reg pixel_xy[2];
+   struct brw_reg delta_xy[2];
+   struct brw_reg pixel_w;
 
-   const struct brw_wm_ref *pass0_fp_reg[16][256][4];
 
-   struct brw_wm_ref undef_ref;
-   struct brw_wm_value undef_value;
+   struct brw_reg wm_regs[8][32][4];
 
-   struct brw_wm_ref refs[BRW_WM_MAX_REF];
-   unsigned nr_refs;
+   struct brw_reg payload_depth[4];
+   struct brw_reg payload_coef[16];
 
-   struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
-   unsigned nr_insns;
+   struct brw_reg emit_mask_reg;
 
-   struct brw_wm_constref constref[BRW_WM_MAX_CONST];
-   unsigned nr_constrefs;
+   struct brw_instruction *if_inst[MAX_IFSN];
+   int if_insn;
 
-   struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+   struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+   int loop_insn;
 
-   unsigned grf_limit;
-   unsigned max_wm_grf;
-   unsigned last_scratch;
+   struct brw_instruction *inst0;
+   struct brw_instruction *inst1;
 
-   struct {
-       boolean inited;
-       struct brw_reg reg;
-   } wm_regs[16][256][4];
    struct brw_reg stack;
-   struct brw_reg emit_mask_reg;
+   struct brw_indirect stack_index;
+
    unsigned reg_index;
+
+   unsigned tmp_start;
    unsigned tmp_index;
 };
 
 
-unsigned brw_wm_nr_args( unsigned opcode );
-unsigned brw_wm_is_scalar_result( unsigned opcode );
-
-void brw_wm_pass_fp( struct brw_wm_compile *c );
-void brw_wm_pass0( struct brw_wm_compile *c );
-void brw_wm_pass1( struct brw_wm_compile *c );
-void brw_wm_pass2( struct brw_wm_compile *c );
-void brw_wm_emit( struct brw_wm_compile *c );
-
-void brw_wm_print_value( struct brw_wm_compile *c,
-                        struct brw_wm_value *value );
-
-void brw_wm_print_ref( struct brw_wm_compile *c,
-                      struct brw_wm_ref *ref );
-
-void brw_wm_print_insn( struct brw_wm_compile *c,
-                       struct brw_wm_instruction *inst );
-
-void brw_wm_print_program( struct brw_wm_compile *c,
-                          const char *stage );
 
 void brw_wm_lookup_iz( unsigned line_aa,
                       unsigned lookup,
                       struct brw_wm_prog_key *key );
 
-#if 0
-boolean brw_wm_is_glsl(struct gl_fragment_program *fp);
 void brw_wm_glsl_emit(struct brw_wm_compile *c);
-#endif
+void brw_wm_emit_decls(struct brw_wm_compile *c);
 
 #endif
diff --git a/src/mesa/pipe/i965simple/brw_wm_decl.c b/src/mesa/pipe/i965simple/brw_wm_decl.c
new file mode 100644 (file)
index 0000000..392f17f
--- /dev/null
@@ -0,0 +1,377 @@
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+   c->tmp_index++;
+   c->reg_index = MAX2(c->reg_index, c->tmp_index);
+   return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
+}
+
+static void release_tmps(struct brw_wm_compile *c)
+{
+   c->tmp_index = 0;
+}
+
+
+
+static int is_null( struct brw_reg reg )
+{
+   return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+          reg.nr == BRW_ARF_NULL);
+}
+
+static void emit_pixel_xy( struct brw_wm_compile *c )
+{
+   if (is_null(c->pixel_xy[0])) {
+
+      struct brw_compile *p = &c->func;
+      struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+
+      c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+      c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW));
+
+      /* Calculate pixel centers by adding 1 or 0 to each of the
+       * micro-tile coordinates passed in r1.
+       */
+      brw_ADD(p,
+             c->pixel_xy[0],
+             stride(suboffset(r1_uw, 4), 2, 4, 0),
+             brw_imm_v(0x10101010));
+
+      brw_ADD(p,
+             c->pixel_xy[1],
+             stride(suboffset(r1_uw, 5), 2, 4, 0),
+             brw_imm_v(0x11001100));
+   }
+}
+
+
+
+
+
+
+static void emit_delta_xy( struct brw_wm_compile *c )
+{
+   if (is_null(c->delta_xy[0])) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+      emit_pixel_xy(c);
+
+      c->delta_xy[0] = alloc_tmp(c);
+      c->delta_xy[1] = alloc_tmp(c);
+
+      /* Calc delta X,Y by subtracting origin in r1 from the pixel
+       * centers.
+       */
+      brw_ADD(p,
+             c->delta_xy[0],
+             retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW),
+             negate(r1));
+
+      brw_ADD(p,
+             c->delta_xy[1],
+             retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW),
+             negate(suboffset(r1,1)));
+   }
+}
+
+
+
+#if 0
+static void emit_pixel_w( struct brw_wm_compile *c )
+{
+   if (is_null(c->pixel_w)) {
+      struct brw_compile *p = &c->func;
+
+      struct brw_reg interp_wpos = c->coef_wpos;
+      
+      c->pixel_w = alloc_tmp(c);
+
+      emit_delta_xy(c);
+
+      /* Calc 1/w - just linterp wpos[3] optimized by putting the
+       * result straight into a message reg.
+       */
+      struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4);
+      brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]);
+      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]);
+
+      /* Calc w */
+      brw_math_16( p, 
+                  c->pixel_w,
+                  BRW_MATH_FUNCTION_INV,
+                  BRW_MATH_SATURATE_NONE,
+                  2, 
+                  brw_null_reg(),
+                  BRW_MATH_PRECISION_FULL);
+   }
+}
+#endif
+
+
+static void emit_cinterp(struct brw_wm_compile *c,
+                        int idx,
+                        int mask )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg interp[4];
+   struct brw_reg coef = c->payload_coef[idx];
+   int i;
+
+   interp[0] = brw_vec1_grf(coef.nr, 0);
+   interp[1] = brw_vec1_grf(coef.nr, 4);
+   interp[2] = brw_vec1_grf(coef.nr+1, 0);
+   interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+        struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+        brw_MOV(p, dst, suboffset(interp[i],3));
+      }
+   }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+                        int idx,
+                        int mask )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg interp[4];
+   struct brw_reg coef = c->payload_coef[idx];
+   int i;
+
+   emit_delta_xy(c);
+
+   interp[0] = brw_vec1_grf(coef.nr, 0);
+   interp[1] = brw_vec1_grf(coef.nr, 4);
+   interp[2] = brw_vec1_grf(coef.nr+1, 0);
+   interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+        struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i];
+        brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+        brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+      }
+   }
+}
+
+#if 0
+static void emit_pinterp(struct brw_wm_compile *c,
+                        int idx,
+                        int mask )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg interp[4];
+   struct brw_reg coef = c->payload_coef[idx];
+   int i;
+
+   get_delta_xy(c);
+   get_pixel_w(c);
+
+   interp[0] = brw_vec1_grf(coef.nr, 0);
+   interp[1] = brw_vec1_grf(coef.nr, 4);
+   interp[2] = brw_vec1_grf(coef.nr+1, 0);
+   interp[3] = brw_vec1_grf(coef.nr+1, 4);
+
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+        struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i);
+        brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]);
+        brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]);
+        brw_MUL(p, dst, dst, c->pixel_w);
+      }
+   }
+}
+#endif
+
+
+
+#if 0
+static void emit_wpos( )
+{ 
+   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+   struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+   struct tgsi_full_src_register deltas = get_delta_xy(c);
+   struct tgsi_full_src_register arg2;
+   unsigned opcode;
+
+   opcode = WM_LINTERP;
+   arg2 = src_undef();
+
+   /* Have to treat wpos.xy specially:
+    */
+   emit_op(c,
+          WM_WPOSXY,
+          dst_mask(dst, WRITEMASK_XY),
+          0, 0, 0,
+          get_pixel_xy(c),
+          src_undef(),
+          src_undef());
+      
+   dst = dst_mask(dst, WRITEMASK_ZW);
+
+   /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+    */
+   emit_op(c,
+          WM_LINTERP,
+          dst,
+          0, 0, 0,
+          interp,
+          deltas,
+          arg2);
+}
+#endif
+
+
+
+
+/* Perform register allocation:
+ * 
+ *  -- r0???
+ *  -- passthrough depth regs (and stencil/aa??)
+ *  -- curbe ??
+ *  -- inputs (coefficients)
+ *
+ * Use a totally static register allocation.  This will perform poorly
+ * but is an easy way to get started (again).
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+   int i, j;
+   int nr_curbe_regs = 0;
+
+   /* R0, then some depth related regs:
+    */
+   for (i = 0; i < c->key.nr_depth_regs; i++) {
+      c->payload_depth[i] =  brw_vec8_grf(i*2, 0);
+      c->reg_index += 2;
+   }
+
+
+   /* Then a copy of our part of the CURBE entry:
+    */
+   {
+      int nr_constants = c->fp->info.nr_regs[TGSI_FILE_CONSTANT];
+      int index = 0;
+
+      c->prog_data.max_const = 4*nr_constants;
+      for (i = 0; i < nr_constants; i++) {
+        for (j = 0; j < 4; j++, index++) 
+           c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8,
+                                                               index%8);
+      }
+
+      nr_curbe_regs = 2*((4*nr_constants+15)/16);
+      c->reg_index += nr_curbe_regs;
+   }
+
+   /* Next we receive the plane coefficients for parameter
+    * interpolation:
+    */
+   for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) {
+      c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0);
+      c->reg_index += 2;
+   }
+
+   c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+   c->prog_data.urb_read_length = c->fp->program.num_inputs * 2;
+   c->prog_data.curb_read_length = nr_curbe_regs;
+
+   /* That's the end of the payload, now we can start allocating registers.
+    */
+   c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+   c->reg_index++;
+
+   c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
+   c->reg_index += 2;
+
+   /* Now allocate room for the interpolated inputs and staging
+    * registers for the outputs:
+    */
+   for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_INPUT]; i++) 
+      for (j = 0; j < 4; j++)
+        c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+   for (i = 0; i < c->fp->info.nr_regs[TGSI_FILE_OUTPUT]; i++) 
+      for (j = 0; j < 4; j++)
+        c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 );
+
+   /* Beyond this we should only need registers for internal temporaries:
+    */
+   c->tmp_start = c->reg_index;
+}
+
+
+
+
+
+/* Need to interpolate fragment program inputs in as a preamble to the
+ * shader.  A more sophisticated compiler would do this on demand, but
+ * we'll do it up front:
+ */
+void brw_wm_emit_decls(struct brw_wm_compile *c)
+{
+   struct tgsi_parse_context parse;
+   int done = 0;
+
+   prealloc_reg(c);
+
+   tgsi_parse_init( &parse, c->fp->program.tokens );
+
+   while( !done &&
+         !tgsi_parse_end_of_tokens( &parse ) ) 
+   {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+      {
+        const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
+        unsigned first = decl->u.DeclarationRange.First;
+        unsigned last = decl->u.DeclarationRange.Last;
+        unsigned mask = decl->Declaration.UsageMask; /* ? */
+        unsigned i;
+
+        if (decl->Declaration.File != TGSI_FILE_INPUT)
+           break;
+
+        assert(decl->Declaration.Interpolate);
+
+        for( i = first; i <= last; i++ ) {
+           switch (decl->Interpolation.Interpolate) {
+           case TGSI_INTERPOLATE_CONSTANT:
+              emit_cinterp(c, i, mask);
+              break;
+
+           case TGSI_INTERPOLATE_LINEAR:
+              emit_linterp(c, i, mask);
+              break;
+
+           case TGSI_INTERPOLATE_PERSPECTIVE:
+              //emit_pinterp(c, i, mask);
+              emit_linterp(c, i, mask);
+              break;
+           }
+        }
+        break;
+      }
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+      default:
+         done = 1;
+        break;
+      }
+   }
+
+   tgsi_parse_free (&parse);
+   
+   release_tmps(c);
+}
index 90e73a605a6bd726f9c88dc87e6fa52a7b1d3c5c..d6dfaed8263146b017a4a8c8f0ce54dbadecaa8b 100644 (file)
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
 
-#if 0
-
-/* Only guess, need a flag in gl_fragment_program later */
-boolean brw_wm_is_glsl(struct gl_fragment_program *fp)
-{
-    int i;
-    for (i = 0; i < fp->Base.NumInstructions; i++) {
-       struct prog_instruction *inst = &fp->Base.Instructions[i];
-       switch (inst->Opcode) {
-           case OPCODE_IF:
-           case OPCODE_INT:
-           case OPCODE_ENDIF:
-           case OPCODE_CAL:
-           case OPCODE_BRK:
-           case OPCODE_RET:
-           case OPCODE_DDX:
-           case OPCODE_DDY:
-           case OPCODE_BGNLOOP:
-               return TRUE;
-           default:
-               break;
-       }
-    }
-    return FALSE;
-}
 
-static void set_reg(struct brw_wm_compile *c, int file, int index,
-       int component, struct brw_reg reg)
-{
-    c->wm_regs[file][index][component].reg = reg;
-    c->wm_regs[file][index][component].inited = TRUE;
-}
 
-static int get_scalar_dst_index(struct prog_instruction *inst)
+static int get_scalar_dst_index(struct tgsi_full_instruction *inst)
 {
-    int i;
-    for (i = 0; i < 4; i++)
-       if (inst->DstReg.WriteMask & (1<<i))
-           break;
-    return i;
+   struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister;
+   int i;
+   for (i = 0; i < 4; i++)
+      if (dst.WriteMask & (1<<i))
+        break;
+   return i;
 }
 
 static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
 {
-    struct brw_reg reg;
-    reg = brw_vec8_grf(c->tmp_index--, 0);
-    return reg;
+   c->tmp_index++;
+   c->reg_index = MAX2(c->reg_index, c->tmp_index);
+   return brw_vec8_grf(c->tmp_start + c->tmp_index, 0);
 }
 
 static void release_tmps(struct brw_wm_compile *c)
 {
-    c->tmp_index = 127;
+   c->tmp_index = 0;
 }
 
+
 static struct brw_reg
-get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, unsigned neg, unsigned abs)
+get_reg(struct brw_wm_compile *c, int file, int index, int component )
 {
-    struct brw_reg reg;
-    switch (file) {
-       case PROGRAM_STATE_VAR:
-       case PROGRAM_CONSTANT:
-       case PROGRAM_UNIFORM:
-           file = PROGRAM_STATE_VAR;
-           break;
-       case PROGRAM_UNDEFINED:
-           return brw_null_reg();
-       default:
-           break;
-    }
-
-    if(c->wm_regs[file][index][component].inited)
-       reg = c->wm_regs[file][index][component].reg;
-    else
-       reg = brw_vec8_grf(c->reg_index, 0);
-
-    if(!c->wm_regs[file][index][component].inited) {
-       set_reg(c, file, index, component, reg);
-       c->reg_index++;
-    }
-
-    if (neg & (1<< component)) {
-       reg = negate(reg);
-    }
-    if (abs)
-       reg = brw_abs(reg);
-    return reg;
+   switch (file) {
+   case TGSI_FILE_NULL:
+      return brw_null_reg();
+
+   case TGSI_FILE_SAMPLER:
+      /* Should never get here:
+       */
+      assert (0);             
+      return brw_null_reg();
+
+   case TGSI_FILE_IMMEDIATE:
+      /* These need a different path:
+       */
+      assert(0);
+      return brw_null_reg();
+
+       
+   case TGSI_FILE_CONSTANT:
+   case TGSI_FILE_INPUT:
+   case TGSI_FILE_OUTPUT:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_ADDRESS:
+      return c->wm_regs[file][index][component];
+
+   default:
+      assert(0);
+      return brw_null_reg();
+   }
 }
 
-static void prealloc_reg(struct brw_wm_compile *c)
-{
-    int i, j;
-    struct brw_reg reg;
-    int nr_interp_regs = 0;
-    unsigned inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted;
-
-    for (i = 0; i < 4; i++) {
-       reg = (i < c->key.nr_depth_regs)
-           ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0);
-       set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
-    }
-    c->reg_index += 2*c->key.nr_depth_regs;
-    {
-       int nr_params = c->fp->program.Base.Parameters->NumParameters;
-       struct gl_program_parameter_list *plist =
-           c->fp->program.Base.Parameters;
-       int index = 0;
-       c->prog_data.nr_params = 4*nr_params;
-       for (i = 0; i < nr_params; i++) {
-           for (j = 0; j < 4; j++, index++) {
-               reg = brw_vec1_grf(c->reg_index + index/8,
-                       index%8);
-               c->prog_data.param[index] =
-                   &plist->ParameterValues[i][j];
-               set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
-           }
-       }
-       c->nr_creg = 2*((4*nr_params+15)/16);
-       c->reg_index += c->nr_creg;
-    }
-    for (i = 0; i < FRAG_ATTRIB_MAX; i++) {
-       if (inputs & (1<<i)) {
-           nr_interp_regs++;
-           reg = brw_vec8_grf(c->reg_index, 0);
-           for (j = 0; j < 4; j++)
-               set_reg(c, PROGRAM_PAYLOAD, i, j, reg);
-           c->reg_index += 2;
-
-       }
-    }
-    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
-    c->prog_data.urb_read_length = nr_interp_regs * 2;
-    c->prog_data.curb_read_length = c->nr_creg;
-    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
-    c->reg_index++;
-    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0);
-    c->reg_index += 2;
-}
 
 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
-       struct prog_instruction *inst, int component, int nr)
-{
-    return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
-           0, 0);
-}
-
-static struct brw_reg get_src_reg(struct brw_wm_compile *c,
-       struct prog_src_register *src, int index, int nr)
+                                 struct tgsi_full_instruction *inst, 
+                                 int component)
 {
-    int component = GET_SWZ(src->Swizzle, index);
-    return get_reg(c, src->File, src->Index, component, nr,
-           src->NegateBase, src->Abs);
+   return get_reg(c, 
+                 inst->FullDstRegisters[0].DstRegister.File, 
+                 inst->FullDstRegisters[0].DstRegister.Index,
+                 component);
 }
 
-static void emit_abs( struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    int i;
-    struct brw_compile *p = &c->func;
-    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-    for (i = 0; i < 4; i++) {
-       if (inst->DstReg.WriteMask & (1<<i)) {
-           struct brw_reg src, dst;
-           dst = get_dst_reg(c, inst, i, 1);
-           src = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           brw_MOV(p, dst, brw_abs(src));
-       }
-    }
-    brw_set_saturate(p, 0);
-}
-
-static void emit_int( struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    int i;
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           struct brw_reg src, dst;
-           dst = get_dst_reg(c, inst, i, 1) ;
-           src = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           brw_RNDD(p, dst, src);
-       }
-    }
-    brw_set_saturate(p, 0);
-}
-
-static void emit_mov( struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    int i;
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           struct brw_reg src, dst;
-           dst = get_dst_reg(c, inst, i, 1);
-           src = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           brw_MOV(p, dst, src);
-       }
-    }
-    brw_set_saturate(p, 0);
-}
-
-static void emit_pixel_xy(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_reg r1 = brw_vec1_grf(1, 0);
-    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
-
-    struct brw_reg dst0, dst1;
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-
-    dst0 = get_dst_reg(c, inst, 0, 1);
-    dst1 = get_dst_reg(c, inst, 1, 1);
-    /* Calculate pixel centers by adding 1 or 0 to each of the
-     * micro-tile coordinates passed in r1.
-     */
-    if (mask & WRITEMASK_X) {
-       brw_ADD(p,
-               vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
-               stride(suboffset(r1_uw, 4), 2, 4, 0),
-               brw_imm_v(0x10101010));
-    }
-
-    if (mask & WRITEMASK_Y) {
-       brw_ADD(p,
-               vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
-               stride(suboffset(r1_uw, 5), 2, 4, 0),
-               brw_imm_v(0x11001100));
-    }
-
-}
-
-static void emit_delta_xy(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+static int get_swz( struct tgsi_src_register src, int index )
 {
-    struct brw_reg r1 = brw_vec1_grf(1, 0);
-    struct brw_reg dst0, dst1, src0, src1;
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-
-    dst0 = get_dst_reg(c, inst, 0, 1);
-    dst1 = get_dst_reg(c, inst, 1, 1);
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    src1 = get_src_reg(c, &inst->SrcReg[0], 1, 1);
-    /* Calc delta X,Y by subtracting origin in r1 from the pixel
-     * centers.
-     */
-    if (mask & WRITEMASK_X) {
-       brw_ADD(p,
-               dst0,
-               retype(src0, BRW_REGISTER_TYPE_UW),
-               negate(r1));
-    }
-
-    if (mask & WRITEMASK_Y) {
-       brw_ADD(p,
-               dst1,
-               retype(src1, BRW_REGISTER_TYPE_UW),
-               negate(suboffset(r1,1)));
-
-    }
-
+   switch (index & 3) {
+   case 0: return src.SwizzleX;
+   case 1: return src.SwizzleY;
+   case 2: return src.SwizzleZ;
+   case 3: return src.SwizzleW;
+   default: return 0;
+   }
 }
 
-
-static void fire_fb_write( struct brw_wm_compile *c,
-                           unsigned base_reg,
-                           unsigned nr )
+static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index )
 {
-    struct brw_compile *p = &c->func;
-
-    /* Pass through control information:
-     */
-    /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
-    {
-       brw_push_insn_state(p);
-       brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
-       brw_MOV(p,
-               brw_message_reg(base_reg + 1),
-               brw_vec8_grf(1, 0));
-       brw_pop_insn_state(p);
-    }
-    /* Send framebuffer write message: */
-    brw_fb_WRITE(p,
-           retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
-           base_reg,
-           retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
-           0,              /* render surface always 0 */
-           nr,
-           0,
-           1);
+   switch (index & 3) {
+   case 0: return src.ExtSwizzleX;
+   case 1: return src.ExtSwizzleY;
+   case 2: return src.ExtSwizzleZ;
+   case 3: return src.ExtSwizzleW;
+   default: return 0;
+   }
 }
 
-static void emit_fb_write(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    int nr = 2;
-    int channel;
-    struct brw_reg src0;//, src1, src2, dst;
-
-    /* Reserve a space for AA - may not be needed:
-     */
-    if (c->key.aa_dest_stencil_reg)
-       nr += 1;
-    {
-       brw_push_insn_state(p);
-       for (channel = 0; channel < 4; channel++) {
-           src0 = get_src_reg(c,  &inst->SrcReg[0], channel, 1);
-           /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
-           /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
-           brw_MOV(p, brw_message_reg(nr + channel), src0);
-       }
-       /* skip over the regs populated above: */
-       nr += 8;
-       brw_pop_insn_state(p);
-    }
-    fire_fb_write(c, 0, nr);
-}
-
-static void emit_pixel_w( struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    if (mask & WRITEMASK_W) {
-       struct brw_reg dst, src0, delta0, delta1;
-       struct brw_reg interp3;
-
-       dst = get_dst_reg(c, inst, 3, 1);
-       src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-       delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
-       delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
-
-       interp3 = brw_vec1_grf(src0.nr+1, 4);
-       /* Calc 1/w - just linterp wpos[3] optimized by putting the
-        * result straight into a message reg.
-        */
-       brw_LINE(p, brw_null_reg(), interp3, delta0);
-       brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
-
-       /* Calc w */
-       brw_math_16( p, dst,
-               BRW_MATH_FUNCTION_INV,
-               BRW_MATH_SATURATE_NONE,
-               2, brw_null_reg(),
-               BRW_MATH_PRECISION_FULL);
-    }
-}
-
-static void emit_linterp(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+static struct brw_reg get_src_reg(struct brw_wm_compile *c,
+                                 struct tgsi_full_src_register *src, 
+                                 int index)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg interp[4];
-    struct brw_reg dst, delta0, delta1;
-    struct brw_reg src0;
-
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
-    delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
-    unsigned nr = src0.nr;
-    int i;
-
-    interp[0] = brw_vec1_grf(nr, 0);
-    interp[1] = brw_vec1_grf(nr, 4);
-    interp[2] = brw_vec1_grf(nr+1, 0);
-    interp[3] = brw_vec1_grf(nr+1, 4);
-
-    for(i = 0; i < 4; i++ ) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           brw_LINE(p, brw_null_reg(), interp[i], delta0);
-           brw_MAC(p, dst, suboffset(interp[i],1), delta1);
-       }
-    }
+   struct brw_reg reg;
+   int component = index;
+   int neg = 0;
+   int abs = 0;
+
+   if (src->SrcRegister.Negate)
+      neg = 1;
+
+   component = get_swz(src->SrcRegister, component);
+
+   /* Yes, there are multiple negates:
+    */
+   switch (component & 3) {
+   case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break;
+   case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break;
+   case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break;
+   case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break;
+   }
+
+   /* And multiple swizzles, fun isn't it:
+    */
+   component = get_ext_swz(src->SrcRegisterExtSwz, component);
+
+   /* Can't handle this, don't know if we need to:
+    */
+   assert(src->SrcRegisterExtSwz.ExtDivide == 0);
+       
+   /* Not handling indirect lookups yet:
+    */
+   assert(src->SrcRegister.Indirect == 0);
+
+   /* Don't know what dimension means:
+    */
+   assert(src->SrcRegister.Dimension == 0);
+
+   /* Will never handle any of this stuff: 
+    */
+   assert(src->SrcRegisterExtMod.Complement == 0);
+   assert(src->SrcRegisterExtMod.Bias == 0);
+   assert(src->SrcRegisterExtMod.Scale2X == 0);
+
+   if (src->SrcRegisterExtMod.Absolute)
+      abs = 1;
+
+   /* Another negate!  This is a post-absolute negate, which we
+    * can't do.  Need to clean the crap out of tgsi somehow.
+    */
+   assert(src->SrcRegisterExtMod.Negate == 0);
+
+   switch( component ) {
+   case TGSI_EXTSWIZZLE_X:
+   case TGSI_EXTSWIZZLE_Y:
+   case TGSI_EXTSWIZZLE_Z:
+   case TGSI_EXTSWIZZLE_W:
+      reg = get_reg(c, 
+                   src->SrcRegister.File, 
+                   src->SrcRegister.Index, 
+                   component );
+
+      if (neg) 
+        reg = negate(reg);
+   
+      if (abs)
+        reg = brw_abs(reg);
+
+      break;
+
+      /* XXX: this won't really work in the general case, but we know
+       * that the extended swizzle is only allowed in the SWZ
+       * instruction (right??), in which case using an immediate
+       * directly will work.
+       */
+   case TGSI_EXTSWIZZLE_ZERO:
+      reg = brw_imm_f(0);
+      break;
+
+   case TGSI_EXTSWIZZLE_ONE:
+      if (neg && !abs)
+        reg = brw_imm_f(-1.0);
+      else
+        reg = brw_imm_f(1.0);
+      break;
+
+   default:
+      assert(0);
+      break;
+   }
+
+    
+   return reg;
 }
 
-static void emit_cinterp(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+static void emit_abs( struct brw_wm_compile *c,
+                     struct tgsi_full_instruction *inst)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-
-    struct brw_reg interp[4];
-    struct brw_reg dst, src0;
-
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    unsigned nr = src0.nr;
-    int i;
-
-    interp[0] = brw_vec1_grf(nr, 0);
-    interp[1] = brw_vec1_grf(nr, 4);
-    interp[2] = brw_vec1_grf(nr+1, 0);
-    interp[3] = brw_vec1_grf(nr+1, 4);
-
-    for(i = 0; i < 4; i++ ) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           brw_MOV(p, dst, suboffset(interp[i],3));
-       }
-    }
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+
+   int i;
+   struct brw_compile *p = &c->func;
+   brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+        struct brw_reg src, dst;
+        dst = get_dst_reg(c, inst, i);
+        src = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+        brw_MOV(p, dst, brw_abs(src)); /* NOTE */
+      }
+   }
+   brw_set_saturate(p, 0);
 }
 
-static void emit_pinterp(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-
-    struct brw_reg interp[4];
-    struct brw_reg dst, delta0, delta1;
-    struct brw_reg src0, w;
-
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
-    delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1);
-    w = get_src_reg(c, &inst->SrcReg[2], 3, 1);
-    unsigned nr = src0.nr;
-    int i;
-
-    interp[0] = brw_vec1_grf(nr, 0);
-    interp[1] = brw_vec1_grf(nr, 4);
-    interp[2] = brw_vec1_grf(nr+1, 0);
-    interp[3] = brw_vec1_grf(nr+1, 4);
-
-    for(i = 0; i < 4; i++ ) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           brw_LINE(p, brw_null_reg(), interp[i], delta0);
-           brw_MAC(p, dst, suboffset(interp[i],1),
-                   delta1);
-           brw_MUL(p, dst, dst, w);
-       }
-    }
-}
 
 static void emit_xpd(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    int i;
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    for (i = 0; i < 4; i++) {
-       unsigned i2 = (i+2)%3;
-       unsigned i1 = (i+1)%3;
-       if (mask & (1<<i)) {
-           struct brw_reg src0, src1, dst;
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = negate(get_src_reg(c, &inst->SrcReg[0], i2, 1));
-           src1 = get_src_reg(c, &inst->SrcReg[1], i1, 1);
-           brw_MUL(p, brw_null_reg(), src0, src1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i1, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i2, 1);
-           brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-           brw_MAC(p, dst, src0, src1);
-           brw_set_saturate(p, 0);
-       }
-    }
-    brw_set_saturate(p, 0);
+   int i;
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   for (i = 0; i < 4; i++) {
+      unsigned i2 = (i+2)%3;
+      unsigned i1 = (i+1)%3;
+      if (mask & (1<<i)) {
+        struct brw_reg src0, src1, dst;
+        dst = get_dst_reg(c, inst, i);
+        src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2));
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1);
+        brw_MUL(p, brw_null_reg(), src0, src1);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1);
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2);
+        brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+        brw_MAC(p, dst, src0, src1);
+        brw_set_saturate(p, 0);
+      }
+   }
+   brw_set_saturate(p, 0);
 }
 
 static void emit_dp3(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_reg src0[3], src1[3], dst;
-    int i;
-    struct brw_compile *p = &c->func;
-    for (i = 0; i < 3; i++) {
-       src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
-       src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
-    }
-
-    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
-    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
-    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    brw_MAC(p, dst, src0[2], src1[2]);
-    brw_set_saturate(p, 0);
+   struct brw_reg src0[3], src1[3], dst;
+   int i;
+   struct brw_compile *p = &c->func;
+   for (i = 0; i < 3; i++) {
+      src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+      src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+   }
+
+   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+   brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+   brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+   brw_MAC(p, dst, src0[2], src1[2]);
+   brw_set_saturate(p, 0);
 }
 
 static void emit_dp4(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_reg src0[4], src1[4], dst;
-    int i;
-    struct brw_compile *p = &c->func;
-    for (i = 0; i < 4; i++) {
-       src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
-       src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
-    }
-    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
-    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
-    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
-    brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    brw_MAC(p, dst, src0[3], src1[3]);
-    brw_set_saturate(p, 0);
+   struct brw_reg src0[4], src1[4], dst;
+   int i;
+   struct brw_compile *p = &c->func;
+   for (i = 0; i < 4; i++) {
+      src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+      src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+   }
+   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+   brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+   brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+   brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+   brw_MAC(p, dst, src0[3], src1[3]);
+   brw_set_saturate(p, 0);
 }
 
 static void emit_dph(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_reg src0[4], src1[4], dst;
-    int i;
-    struct brw_compile *p = &c->func;
-    for (i = 0; i < 4; i++) {
-       src0[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
-       src1[i] = get_src_reg(c, &inst->SrcReg[1], i, 1);
-    }
-    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
-    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
-    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
-    brw_MAC(p, dst, src0[2], src1[2]);
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    brw_ADD(p, dst, src0[3], src1[3]);
-    brw_set_saturate(p, 0);
+   struct brw_reg src0[4], src1[4], dst;
+   int i;
+   struct brw_compile *p = &c->func;
+   for (i = 0; i < 4; i++) {
+      src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+      src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+   }
+   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+   brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+   brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+   brw_MAC(p, dst, src0[2], src1[2]);
+   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+   brw_ADD(p, dst, src0[3], src1[3]);
+   brw_set_saturate(p, 0);
 }
 
 static void emit_math1(struct brw_wm_compile *c,
-               struct prog_instruction *inst, unsigned func)
+                      struct tgsi_full_instruction *inst, unsigned func)
 {
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, dst;
+   struct brw_compile *p = &c->func;
+   struct brw_reg src0, dst;
 
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
-    brw_MOV(p, brw_message_reg(2), src0);
-    brw_math(p,
+   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+   brw_MOV(p, brw_message_reg(2), src0);
+   brw_math(p,
            dst,
            func,
-           (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+           ((inst->Instruction.Saturate != TGSI_SAT_NONE) 
+            ? BRW_MATH_SATURATE_SATURATE 
+            : BRW_MATH_SATURATE_NONE),
            2,
            brw_null_reg(),
            BRW_MATH_DATA_VECTOR,
            BRW_MATH_PRECISION_FULL);
 }
 
-static void emit_rcp(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
-}
 
-static void emit_rsq(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+static void emit_alu2(struct brw_wm_compile *c,                      
+                     struct tgsi_full_instruction *inst,
+                     unsigned opcode)
 {
-    emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+   struct brw_compile *p = &c->func;
+   struct brw_reg src0, src1, dst;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   int i;
+   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+   for (i = 0 ; i < 4; i++) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+        brw_alu2(p, opcode, dst, src0, src1);
+      }
+   }
+   brw_set_saturate(p, 0);
 }
 
-static void emit_sin(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
-}
 
-static void emit_cos(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+static void emit_alu1(struct brw_wm_compile *c,
+                     struct tgsi_full_instruction *inst,
+                     unsigned opcode)
 {
-    emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+   struct brw_compile *p = &c->func;
+   struct brw_reg src0, dst;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   int i;
+   brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+   for (i = 0 ; i < 4; i++) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+        brw_alu1(p, opcode, dst, src0);
+      }
+   }
+   if (inst->Instruction.Saturate != TGSI_SAT_NONE)
+      brw_set_saturate(p, 0);
 }
 
-static void emit_ex2(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
-}
-
-static void emit_lg2(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
-}
-
-static void emit_add(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, src1, dst;
-    unsigned mask = inst->DstReg.WriteMask;
-    int i;
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    for (i = 0 ; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-           brw_ADD(p, dst, src0, src1);
-       }
-    }
-    brw_set_saturate(p, 0);
-}
-
-static void emit_sub(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, src1, dst;
-    unsigned mask = inst->DstReg.WriteMask;
-    int i;
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    for (i = 0 ; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-           brw_ADD(p, dst, src0, negate(src1));
-       }
-    }
-    brw_set_saturate(p, 0);
-}
-
-static void emit_mul(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, src1, dst;
-    unsigned mask = inst->DstReg.WriteMask;
-    int i;
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    for (i = 0 ; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-           brw_MUL(p, dst, src0, src1);
-       }
-    }
-    brw_set_saturate(p, 0);
-}
-
-static void emit_frc(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, dst;
-    unsigned mask = inst->DstReg.WriteMask;
-    int i;
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    for (i = 0 ; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           brw_FRC(p, dst, src0);
-       }
-    }
-    if (inst->SaturateMode != SATURATE_OFF)
-       brw_set_saturate(p, 0);
-}
-
-static void emit_flr(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, dst;
-    unsigned mask = inst->DstReg.WriteMask;
-    int i;
-    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-    for (i = 0 ; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           brw_RNDD(p, dst, src0);
-       }
-    }
-    brw_set_saturate(p, 0);
-}
 
 static void emit_max(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg src0, src1, dst;
-    int i;
-    brw_push_insn_state(p);
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-           brw_MOV(p, dst, src0);
-           brw_set_saturate(p, 0);
-
-           brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
-           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-           brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
-           brw_MOV(p, dst, src1);
-           brw_set_saturate(p, 0);
-           brw_set_predicate_control_flag_value(p, 0xff);
-       }
-    }
-    brw_pop_insn_state(p);
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   struct brw_reg src0, src1, dst;
+   int i;
+   brw_push_insn_state(p);
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+        brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+        brw_MOV(p, dst, src0);
+        brw_set_saturate(p, 0);
+
+        brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1);
+        brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+        brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+        brw_MOV(p, dst, src1);
+        brw_set_saturate(p, 0);
+        brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+   brw_pop_insn_state(p);
 }
 
 static void emit_min(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg src0, src1, dst;
-    int i;
-    brw_push_insn_state(p);
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-           brw_MOV(p, dst, src0);
-           brw_set_saturate(p, 0);
-
-           brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
-           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-           brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
-           brw_MOV(p, dst, src1);
-           brw_set_saturate(p, 0);
-           brw_set_predicate_control_flag_value(p, 0xff);
-       }
-    }
-    brw_pop_insn_state(p);
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   struct brw_reg src0, src1, dst;
+   int i;
+   brw_push_insn_state(p);
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+        brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+        brw_MOV(p, dst, src0);
+        brw_set_saturate(p, 0);
+
+        brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+        brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+        brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+        brw_MOV(p, dst, src1);
+        brw_set_saturate(p, 0);
+        brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+   brw_pop_insn_state(p);
 }
 
 static void emit_pow(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_compile *p = &c->func;
-    struct brw_reg dst, src0, src1;
-    dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1);
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    src1 = get_src_reg(c, &inst->SrcReg[1], 0, 1);
+   struct brw_compile *p = &c->func;
+   struct brw_reg dst, src0, src1;
+   dst = get_dst_reg(c, inst, get_scalar_dst_index(inst));
+   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+   src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0);
 
-    brw_MOV(p, brw_message_reg(2), src0);
-    brw_MOV(p, brw_message_reg(3), src1);
+   brw_MOV(p, brw_message_reg(2), src0);
+   brw_MOV(p, brw_message_reg(3), src1);
 
-    brw_math(p,
+   brw_math(p,
            dst,
            BRW_MATH_FUNCTION_POW,
-           (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+           (inst->Instruction.Saturate != TGSI_SAT_NONE 
+            ? BRW_MATH_SATURATE_SATURATE 
+            : BRW_MATH_SATURATE_NONE),
            2,
            brw_null_reg(),
            BRW_MATH_DATA_VECTOR,
@@ -756,601 +440,636 @@ static void emit_pow(struct brw_wm_compile *c,
 }
 
 static void emit_lrp(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
-    int i;
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-
-           if (src1.nr == dst.nr) {
-               tmp1 = alloc_tmp(c);
-               brw_MOV(p, tmp1, src1);
-           } else
-               tmp1 = src1;
-
-           src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
-           if (src2.nr == dst.nr) {
-               tmp2 = alloc_tmp(c);
-               brw_MOV(p, tmp2, src2);
-           } else
-               tmp2 = src2;
-
-           brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
-           brw_MUL(p, brw_null_reg(), dst, tmp2);
-           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-           brw_MAC(p, dst, src0, tmp1);
-           brw_set_saturate(p, 0);
-       }
-       release_tmps(c);
-    }
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+   int i;
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+
+        if (src1.nr == dst.nr) {
+           tmp1 = alloc_tmp(c);
+           brw_MOV(p, tmp1, src1);
+        } else
+           tmp1 = src1;
+
+        src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i);
+        if (src2.nr == dst.nr) {
+           tmp2 = alloc_tmp(c);
+           brw_MOV(p, tmp2, src2);
+        } else
+           tmp2 = src2;
+
+        brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+        brw_MUL(p, brw_null_reg(), dst, tmp2);
+        brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+        brw_MAC(p, dst, src0, tmp1);
+        brw_set_saturate(p, 0);
+      }
+      release_tmps(c);
+   }
 }
 
 static void emit_kil(struct brw_wm_compile *c)
 {
-       struct brw_compile *p = &c->func;
-       struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
-       brw_push_insn_state(p);
-       brw_set_mask_control(p, BRW_MASK_DISABLE);
-       brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
-       brw_AND(p, depth, c->emit_mask_reg, depth);
-       brw_pop_insn_state(p);
+   struct brw_compile *p = &c->func;
+   struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+   brw_AND(p, depth, c->emit_mask_reg, depth);
+   brw_pop_insn_state(p);
 }
 
 static void emit_mad(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg dst, src0, src1, src2;
-    int i;
-
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-           src2 = get_src_reg(c, &inst->SrcReg[2], i, 1);
-           brw_MUL(p, dst, src0, src1);
-
-           brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
-           brw_ADD(p, dst, dst, src2);
-           brw_set_saturate(p, 0);
-       }
-    }
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   struct brw_reg dst, src0, src1, src2;
+   int i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+        src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i);
+        brw_MUL(p, dst, src0, src1);
+
+        brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0);
+        brw_ADD(p, dst, dst, src2);
+        brw_set_saturate(p, 0);
+      }
+   }
 }
 
 static void emit_sop(struct brw_wm_compile *c,
-               struct prog_instruction *inst, unsigned cond)
+                    struct tgsi_full_instruction *inst, unsigned cond)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg dst, src0, src1;
-    int i;
-
-    brw_push_insn_state(p);
-    for (i = 0; i < 4; i++) {
-       if (mask & (1<<i)) {
-           dst = get_dst_reg(c, inst, i, 1);
-           src0 = get_src_reg(c, &inst->SrcReg[0], i, 1);
-           src1 = get_src_reg(c, &inst->SrcReg[1], i, 1);
-           brw_CMP(p, brw_null_reg(), cond, src0, src1);
-           brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-           brw_MOV(p, dst, brw_imm_f(0.0));
-           brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
-           brw_MOV(p, dst, brw_imm_f(1.0));
-       }
-    }
-    brw_pop_insn_state(p);
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   struct brw_reg dst, src0, src1;
+   int i;
+
+   brw_push_insn_state(p);
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i);
+        src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i);
+        brw_CMP(p, brw_null_reg(), cond, src0, src1);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+        brw_MOV(p, dst, brw_imm_f(0.0));
+        brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+        brw_MOV(p, dst, brw_imm_f(1.0));
+      }
+   }
+   brw_pop_insn_state(p);
 }
 
-static void emit_slt(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    emit_sop(c, inst, BRW_CONDITIONAL_L);
-}
 
-static void emit_sle(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+static void emit_ddx(struct brw_wm_compile *c,
+                    struct tgsi_full_instruction *inst)
 {
-    emit_sop(c, inst, BRW_CONDITIONAL_LE);
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   struct brw_reg interp[4];
+   struct brw_reg dst;
+   struct brw_reg src0, w;
+   unsigned nr, i;
+   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+   w = get_src_reg(c, &inst->FullSrcRegisters[1], 3);
+   nr = src0.nr;
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+   brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        brw_MOV(p, dst, interp[i]);
+        brw_MUL(p, dst, dst, w);
+      }
+   }
+   brw_set_saturate(p, 0);
 }
 
-static void emit_sgt(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+static void emit_ddy(struct brw_wm_compile *c,
+                    struct tgsi_full_instruction *inst)
 {
-    emit_sop(c, inst, BRW_CONDITIONAL_G);
+   struct brw_compile *p = &c->func;
+   unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   struct brw_reg interp[4];
+   struct brw_reg dst;
+   struct brw_reg src0, w;
+   unsigned nr, i;
+
+   src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0);
+   nr = src0.nr;
+   w = get_src_reg(c, &inst->FullSrcRegisters[1], 3);
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+   brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE);
+   for(i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+        dst = get_dst_reg(c, inst, i);
+        brw_MOV(p, dst, suboffset(interp[i], 1));
+        brw_MUL(p, dst, dst, w);
+      }
+   }
+   brw_set_saturate(p, 0);
 }
 
-static void emit_sge(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
+/* TODO
+   BIAS on SIMD8 not workind yet...
+*/
+static void emit_txb(struct brw_wm_compile *c,
+                    struct tgsi_full_instruction *inst)
 {
-    emit_sop(c, inst, BRW_CONDITIONAL_GE);
-}
+#if 0
+   struct brw_compile *p = &c->func;
+   struct brw_reg payload_reg = c->payload_depth[0];
+   struct brw_reg dst[4], src[4];
+   unsigned i;
+   for (i = 0; i < 4; i++)
+      dst[i] = get_dst_reg(c, inst, i);
+   for (i = 0; i < 4; i++)
+      src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
 
-static void emit_seq(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    emit_sop(c, inst, BRW_CONDITIONAL_EQ);
-}
+#if 0
+   switch (inst->TexSrcTarget) {
+   case TEXTURE_1D_INDEX:
+      brw_MOV(p, brw_message_reg(2), src[0]);
+      brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
+      brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      brw_MOV(p, brw_message_reg(2), src[0]);
+      brw_MOV(p, brw_message_reg(3), src[1]);
+      brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+      break;
+   default:
+      brw_MOV(p, brw_message_reg(2), src[0]);
+      brw_MOV(p, brw_message_reg(3), src[1]);
+      brw_MOV(p, brw_message_reg(4), src[2]);
+      break;
+   }
+#else
+   brw_MOV(p, brw_message_reg(2), src[0]);
+   brw_MOV(p, brw_message_reg(3), src[1]);
+   brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+#endif
 
-static void emit_sne(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+   brw_MOV(p, brw_message_reg(5), src[3]);
+   brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+   brw_SAMPLE(p,
+             retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+             1,
+             retype(payload_reg, BRW_REGISTER_TYPE_UW),
+             inst->TexSrcUnit + 1, /* surface */
+             inst->TexSrcUnit,     /* sampler */
+             inst->FullDstRegisters[0].DstRegister.WriteMask,
+             BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
+             4,
+             4,
+             0);
+#endif
 }
 
-static void emit_ddx(struct brw_wm_compile *c,
-                struct prog_instruction *inst)
+static void emit_tex(struct brw_wm_compile *c,
+                    struct tgsi_full_instruction *inst)
 {
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg interp[4];
-    struct brw_reg dst;
-    struct brw_reg src0, w;
-    unsigned nr, i;
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
-    nr = src0.nr;
-    interp[0] = brw_vec1_grf(nr, 0);
-    interp[1] = brw_vec1_grf(nr, 4);
-    interp[2] = brw_vec1_grf(nr+1, 0);
-    interp[3] = brw_vec1_grf(nr+1, 4);
-    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-    for(i = 0; i < 4; i++ ) {
-        if (mask & (1<<i)) {
-            dst = get_dst_reg(c, inst, i, 1);
-            brw_MOV(p, dst, interp[i]);
-            brw_MUL(p, dst, dst, w);
-        }
-    }
-    brw_set_saturate(p, 0);
-}
+#if 0
+   struct brw_compile *p = &c->func;
+   struct brw_reg payload_reg = c->payload_depth[0];
+   struct brw_reg dst[4], src[4];
+   unsigned msg_len;
+   unsigned i, nr;
+   unsigned emit;
+   boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0;
+
+   for (i = 0; i < 4; i++)
+      dst[i] = get_dst_reg(c, inst, i);
+   for (i = 0; i < 4; i++)
+      src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i);
 
-static void emit_ddy(struct brw_wm_compile *c,
-                struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg interp[4];
-    struct brw_reg dst;
-    struct brw_reg src0, w;
-    unsigned nr, i;
-
-    src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    nr = src0.nr;
-    w = get_src_reg(c, &inst->SrcReg[1], 3, 1);
-    interp[0] = brw_vec1_grf(nr, 0);
-    interp[1] = brw_vec1_grf(nr, 4);
-    interp[2] = brw_vec1_grf(nr+1, 0);
-    interp[3] = brw_vec1_grf(nr+1, 4);
-    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
-    for(i = 0; i < 4; i++ ) {
-        if (mask & (1<<i)) {
-            dst = get_dst_reg(c, inst, i, 1);
-            brw_MOV(p, dst, suboffset(interp[i], 1));
-            brw_MUL(p, dst, dst, w);
-        }
-    }
-    brw_set_saturate(p, 0);
-}
+#if 0
+   switch (inst->TexSrcTarget) {
+   case TEXTURE_1D_INDEX:
+      emit = WRITEMASK_X;
+      nr = 1;
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      emit = WRITEMASK_XY;
+      nr = 2;
+      break;
+   default:
+      emit = WRITEMASK_XYZ;
+      nr = 3;
+      break;
+   }
+#else
+   emit = WRITEMASK_XY;
+   nr = 2;
+#endif
 
-static void emit_wpos_xy(struct brw_wm_compile *c,
-                struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    unsigned mask = inst->DstReg.WriteMask;
-    struct brw_reg src0[2], dst[2];
-
-    dst[0] = get_dst_reg(c, inst, 0, 1);
-    dst[1] = get_dst_reg(c, inst, 1, 1);
-
-    src0[0] = get_src_reg(c, &inst->SrcReg[0], 0, 1);
-    src0[1] = get_src_reg(c, &inst->SrcReg[0], 1, 1);
-
-    /* Calc delta X,Y by subtracting origin in r1 from the pixel
-     * centers.
-     */
-    if (mask & WRITEMASK_X) {
-       brw_MOV(p,
-               dst[0],
-               retype(src0[0], BRW_REGISTER_TYPE_UW));
-    }
-
-    if (mask & WRITEMASK_Y) {
-       /* TODO -- window_height - Y */
-       brw_MOV(p,
-               dst[1],
-               retype(src0[1], BRW_REGISTER_TYPE_UW));
-
-    }
+   msg_len = 1;
+
+   for (i = 0; i < nr; i++) {
+      static const unsigned swz[4] = {0,1,2,2};
+      if (emit & (1<<i))
+        brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+      else
+        brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+      msg_len += 1;
+   }
+
+   if (shadow) {
+      brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
+      brw_MOV(p, brw_message_reg(6), src[2]);
+   }
+
+   brw_SAMPLE(p,
+             retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
+             1,
+             retype(payload_reg, BRW_REGISTER_TYPE_UW),
+             inst->TexSrcUnit + 1, /* surface */
+             inst->TexSrcUnit,     /* sampler */
+             inst->FullDstRegisters[0].DstRegister.WriteMask,
+             BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
+             4,
+             shadow ? 6 : 4,
+             0);
+
+   if (shadow)
+      brw_MOV(p, dst[3], brw_imm_f(1.0));
+#endif
 }
 
-/* TODO
-   BIAS on SIMD8 not workind yet...
- */
-static void emit_txb(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg dst[4], src[4], payload_reg;
-    unsigned i;
-    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
-    for (i = 0; i < 4; i++)
-       dst[i] = get_dst_reg(c, inst, i, 1);
-    for (i = 0; i < 4; i++)
-       src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
-
-    switch (inst->TexSrcTarget) {
-       case TEXTURE_1D_INDEX:
-           brw_MOV(p, brw_message_reg(2), src[0]);
-           brw_MOV(p, brw_message_reg(3), brw_imm_f(0));
-           brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
-           break;
-       case TEXTURE_2D_INDEX:
-       case TEXTURE_RECT_INDEX:
-           brw_MOV(p, brw_message_reg(2), src[0]);
-           brw_MOV(p, brw_message_reg(3), src[1]);
-           brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
-           break;
-       default:
-           brw_MOV(p, brw_message_reg(2), src[0]);
-           brw_MOV(p, brw_message_reg(3), src[1]);
-           brw_MOV(p, brw_message_reg(4), src[2]);
-           break;
-    }
-    brw_MOV(p, brw_message_reg(5), src[3]);
-    brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
-    brw_SAMPLE(p,
-           retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
-           1,
-           retype(payload_reg, BRW_REGISTER_TYPE_UW),
-           inst->TexSrcUnit + 1, /* surface */
-           inst->TexSrcUnit,     /* sampler */
-           inst->DstReg.WriteMask,
-           BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS,
-           4,
-           4,
-           0);
-}
 
-static void emit_tex(struct brw_wm_compile *c,
-               struct prog_instruction *inst)
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg dst[4], src[4], payload_reg;
-    unsigned msg_len;
-    unsigned i, nr;
-    unsigned emit;
-    boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0;
 
-    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
 
-    for (i = 0; i < 4; i++)
-       dst[i] = get_dst_reg(c, inst, i, 1);
-    for (i = 0; i < 4; i++)
-       src[i] = get_src_reg(c, &inst->SrcReg[0], i, 1);
 
 
-    switch (inst->TexSrcTarget) {
-       case TEXTURE_1D_INDEX:
-           emit = WRITEMASK_X;
-           nr = 1;
-           break;
-       case TEXTURE_2D_INDEX:
-       case TEXTURE_RECT_INDEX:
-           emit = WRITEMASK_XY;
-           nr = 2;
-           break;
-       default:
-           emit = WRITEMASK_XYZ;
-           nr = 3;
-           break;
-    }
-    msg_len = 1;
-
-    for (i = 0; i < nr; i++) {
-       static const unsigned swz[4] = {0,1,2,2};
-       if (emit & (1<<i))
-           brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
-       else
-           brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
-       msg_len += 1;
-    }
-
-    if (shadow) {
-       brw_MOV(p, brw_message_reg(5), brw_imm_f(0));
-       brw_MOV(p, brw_message_reg(6), src[2]);
-    }
-
-    brw_SAMPLE(p,
-           retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),
-           1,
-           retype(payload_reg, BRW_REGISTER_TYPE_UW),
-           inst->TexSrcUnit + 1, /* surface */
-           inst->TexSrcUnit,     /* sampler */
-           inst->DstReg.WriteMask,
-           BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE,
-           4,
-           shadow ? 6 : 4,
-           0);
-
-    if (shadow)
-       brw_MOV(p, dst[3], brw_imm_f(1.0));
-}
 
-static void post_wm_emit( struct brw_wm_compile *c )
+
+static void emit_fb_write(struct brw_wm_compile *c,
+                         struct tgsi_full_instruction *inst)
 {
-    unsigned nr_insns = c->fp->program.Base.NumInstructions;
-    unsigned insn, target_insn;
-    struct prog_instruction *inst1, *inst2;
-    struct brw_instruction *brw_inst1, *brw_inst2;
-    int offset;
-    for (insn = 0; insn < nr_insns; insn++) {
-       inst1 = &c->fp->program.Base.Instructions[insn];
-       brw_inst1 = inst1->Data;
-       switch (inst1->Opcode) {
-           case OPCODE_CAL:
-               target_insn = inst1->BranchTarget;
-               inst2 = &c->fp->program.Base.Instructions[target_insn];
-               brw_inst2 = inst2->Data;
-               offset = brw_inst2 - brw_inst1;
-               brw_set_src1(brw_inst1, brw_imm_d(offset*16));
-               break;
-           default:
-               break;
-       }
-    }
+   struct brw_compile *p = &c->func;
+   int nr = 2;
+   int channel;
+   int base_reg = 0;
+
+   // src0 = output color
+   // src1 = payload_depth[0]
+   // src2 = output depth
+   // dst = ???
+
+
+
+   /* Reserve a space for AA - may not be needed:
+    */
+   if (c->key.aa_dest_stencil_reg)
+      nr += 1;
+
+   {
+      brw_push_insn_state(p);
+      for (channel = 0; channel < 4; channel++) {
+        struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel];
+
+        /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+        /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+        brw_MOV(p, brw_message_reg(nr + channel), src0);
+      }
+      /* skip over the regs populated above: */
+      nr += 8;
+      brw_pop_insn_state(p);
+   }
+    
+
+   /* Pass through control information:
+    */
+   /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+      brw_MOV(p,
+             brw_message_reg(base_reg + 1),
+             brw_vec8_grf(1, 0));
+      brw_pop_insn_state(p);
+   }
+
+   /* Send framebuffer write message: */
+   brw_fb_WRITE(p,
+               retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+               base_reg,
+               retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+               0,              /* render surface always 0 */
+               nr,
+               0,
+               1);
+
 }
 
-static void brw_wm_emit_glsl(struct brw_wm_compile *c)
 
+static void brw_wm_emit_instruction( struct brw_wm_compile *c,
+                                    struct tgsi_full_instruction *inst )
 {
-#define MAX_IFSN 32
-#define MAX_LOOP_DEPTH 32
-    struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH];
-    struct brw_instruction *inst0, *inst1;
-    int i, if_insn = 0, loop_insn = 0;
-    struct brw_compile *p = &c->func;
-    struct brw_indirect stack_index = brw_indirect(0, 0);
-
-    brw_init_compile(&c->func);
-    c->reg_index = 0;
-    prealloc_reg(c);
-    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
-    brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
-
-    for (i = 0; i < c->nr_fp_insns; i++) {
-       struct prog_instruction *inst = &c->prog_instructions[i];
-       struct prog_instruction *orig_inst;
-
-       if ((orig_inst = inst->Data) != 0)
-           orig_inst->Data = current_insn(p);
-
-       if (inst->CondUpdate)
-           brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
-       else
-           brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
-
-       switch (inst->Opcode) {
-           case WM_PIXELXY:
-               emit_pixel_xy(c, inst);
-               break;
-           case WM_DELTAXY:
-               emit_delta_xy(c, inst);
-               break;
-           case WM_PIXELW:
-               emit_pixel_w(c, inst);
-               break;
-           case WM_LINTERP:
-               emit_linterp(c, inst);
-               break;
-           case WM_PINTERP:
-               emit_pinterp(c, inst);
-               break;
-           case WM_CINTERP:
-               emit_cinterp(c, inst);
-               break;
-           case WM_WPOSXY:
-               emit_wpos_xy(c, inst);
-               break;
-           case WM_FB_WRITE:
-               emit_fb_write(c, inst);
-               break;
-           case OPCODE_ABS:
-               emit_abs(c, inst);
-               break;
-           case OPCODE_ADD:
-               emit_add(c, inst);
-               break;
-           case OPCODE_SUB:
-               emit_sub(c, inst);
-               break;
-           case OPCODE_FRC:
-               emit_frc(c, inst);
-               break;
-           case OPCODE_FLR:
-               emit_flr(c, inst);
-               break;
-           case OPCODE_LRP:
-               emit_lrp(c, inst);
-               break;
-           case OPCODE_INT:
-               emit_int(c, inst);
-               break;
-           case OPCODE_MOV:
-               emit_mov(c, inst);
-               break;
-           case OPCODE_DP3:
-               emit_dp3(c, inst);
-               break;
-           case OPCODE_DP4:
-               emit_dp4(c, inst);
-               break;
-           case OPCODE_XPD:
-               emit_xpd(c, inst);
-               break;
-           case OPCODE_DPH:
-               emit_dph(c, inst);
-               break;
-           case OPCODE_RCP:
-               emit_rcp(c, inst);
-               break;
-           case OPCODE_RSQ:
-               emit_rsq(c, inst);
-               break;
-           case OPCODE_SIN:
-               emit_sin(c, inst);
-               break;
-           case OPCODE_COS:
-               emit_cos(c, inst);
-               break;
-           case OPCODE_EX2:
-               emit_ex2(c, inst);
-               break;
-           case OPCODE_LG2:
-               emit_lg2(c, inst);
-               break;
-           case OPCODE_MAX:
-               emit_max(c, inst);
-               break;
-           case OPCODE_MIN:
-               emit_min(c, inst);
-               break;
-           case OPCODE_DDX:
-               emit_ddx(c, inst);
-               break;
-           case OPCODE_DDY:
-                emit_ddy(c, inst);
-                break;
-           case OPCODE_SLT:
-               emit_slt(c, inst);
-               break;
-           case OPCODE_SLE:
-               emit_sle(c, inst);
-               break;
-           case OPCODE_SGT:
-               emit_sgt(c, inst);
-               break;
-           case OPCODE_SGE:
-               emit_sge(c, inst);
-               break;
-           case OPCODE_SEQ:
-               emit_seq(c, inst);
-               break;
-           case OPCODE_SNE:
-               emit_sne(c, inst);
-               break;
-           case OPCODE_MUL:
-               emit_mul(c, inst);
-               break;
-           case OPCODE_POW:
-               emit_pow(c, inst);
-               break;
-           case OPCODE_MAD:
-               emit_mad(c, inst);
-               break;
-           case OPCODE_TEX:
-               emit_tex(c, inst);
-               break;
-           case OPCODE_TXB:
-               emit_txb(c, inst);
-               break;
-           case OPCODE_KIL_NV:
-               emit_kil(c);
-               break;
-           case OPCODE_IF:
-               assert(if_insn < MAX_IFSN);
-               if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
-               break;
-           case OPCODE_ELSE:
-               if_inst[if_insn-1]  = brw_ELSE(p, if_inst[if_insn-1]);
-               break;
-           case OPCODE_ENDIF:
-               assert(if_insn > 0);
-               brw_ENDIF(p, if_inst[--if_insn]);
-               break;
-           case OPCODE_BGNSUB:
-           case OPCODE_ENDSUB:
-               break;
-           case OPCODE_CAL:
-               brw_push_insn_state(p);
-               brw_set_mask_control(p, BRW_MASK_DISABLE);
-                brw_set_access_mode(p, BRW_ALIGN_1);
-                brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
-                brw_set_access_mode(p, BRW_ALIGN_16);
-                brw_ADD(p, get_addr_reg(stack_index),
-                         get_addr_reg(stack_index), brw_imm_d(4));
-                orig_inst = inst->Data;
-                orig_inst->Data = &p->store[p->nr_insn];
-                brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
-                brw_pop_insn_state(p);
-               break;
-
-           case OPCODE_RET:
-               brw_push_insn_state(p);
-               brw_set_mask_control(p, BRW_MASK_DISABLE);
-                brw_ADD(p, get_addr_reg(stack_index),
-                        get_addr_reg(stack_index), brw_imm_d(-4));
-                brw_set_access_mode(p, BRW_ALIGN_1);
-                brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
-                brw_set_access_mode(p, BRW_ALIGN_16);
-               brw_pop_insn_state(p);
-
-               break;
-           case OPCODE_BGNLOOP:
-               loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
-               break;
-           case OPCODE_BRK:
-               brw_BREAK(p);
-               brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-               break;
-           case OPCODE_CONT:
-               brw_CONT(p);
-               brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-               break;
-           case OPCODE_ENDLOOP:
-               loop_insn--;
-               inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]);
-               /* patch all the BREAK instructions from
-                  last BEGINLOOP */
-               while (inst0 > loop_inst[loop_insn]) {
-                   inst0--;
-                   if (inst0->header.opcode == BRW_OPCODE_BREAK) {
-                       inst0->bits3.if_else.jump_count = inst1 - inst0 + 1;
-                       inst0->bits3.if_else.pop_count = 0;
-                   } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
-                        inst0->bits3.if_else.jump_count = inst1 - inst0;
-                        inst0->bits3.if_else.pop_count = 0;
-                    }
-               }
-               break;
-           default:
-               _mesa_printf("unsupported IR in fragment shader %d\n",
-                       inst->Opcode);
-       }
-       if (inst->CondUpdate)
-           brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
-       else
-           brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-    }
-    post_wm_emit(c);
-    for (i = 0; i < c->fp->program.Base.NumInstructions; i++)
-       c->fp->program.Base.Instructions[i].Data = NULL;
+   struct brw_compile *p = &c->func;
+
+#if 0   
+   if (inst->CondUpdate)
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+   else
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+#else
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+#endif
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_ABS:
+      emit_abs(c, inst);
+      break;
+   case TGSI_OPCODE_ADD:
+      emit_alu2(c, inst, BRW_OPCODE_ADD);
+      break;
+   case TGSI_OPCODE_SUB:
+      assert(0);
+//      emit_alu2(c, inst, BRW_OPCODE_SUB);
+      break;
+   case TGSI_OPCODE_FRC:
+      emit_alu1(c, inst, BRW_OPCODE_FRC);
+      break;
+   case TGSI_OPCODE_FLR:
+      assert(0);
+//      emit_alu1(c, inst, BRW_OPCODE_FLR);
+      break;
+   case TGSI_OPCODE_LRP:
+      emit_lrp(c, inst);
+      break;
+   case TGSI_OPCODE_INT:
+      emit_alu1(c, inst, BRW_OPCODE_RNDD);
+      break;
+   case TGSI_OPCODE_MOV:
+      emit_alu1(c, inst, BRW_OPCODE_MOV);
+      break;
+   case TGSI_OPCODE_DP3:
+      emit_dp3(c, inst);
+      break;
+   case TGSI_OPCODE_DP4:
+      emit_dp4(c, inst);
+      break;
+   case TGSI_OPCODE_XPD:
+      emit_xpd(c, inst);
+      break;
+   case TGSI_OPCODE_DPH:
+      emit_dph(c, inst);
+      break;
+   case TGSI_OPCODE_RCP:
+      emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+      break;
+   case TGSI_OPCODE_RSQ:
+      emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+      break;
+   case TGSI_OPCODE_SIN:
+      emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+      break;
+   case TGSI_OPCODE_COS:
+      emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+      break;
+   case TGSI_OPCODE_EX2:
+      emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+      break;
+   case TGSI_OPCODE_LG2:
+      emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+      break;
+   case TGSI_OPCODE_MAX:
+      emit_max(c, inst);
+      break;
+   case TGSI_OPCODE_MIN:
+      emit_min(c, inst);
+      break;
+   case TGSI_OPCODE_DDX:
+      emit_ddx(c, inst);
+      break;
+   case TGSI_OPCODE_DDY:
+      emit_ddy(c, inst);
+      break;
+   case TGSI_OPCODE_SLT:
+      emit_sop(c, inst, BRW_CONDITIONAL_L);
+      break;
+   case TGSI_OPCODE_SLE:
+      emit_sop(c, inst, BRW_CONDITIONAL_LE);
+      break;
+   case TGSI_OPCODE_SGT:
+      emit_sop(c, inst, BRW_CONDITIONAL_G);
+      break;
+   case TGSI_OPCODE_SGE:
+      emit_sop(c, inst, BRW_CONDITIONAL_GE);
+      break;
+   case TGSI_OPCODE_SEQ:
+      emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+      break;
+   case TGSI_OPCODE_SNE:
+      emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+      break;
+   case TGSI_OPCODE_MUL:
+      emit_alu2(c, inst, BRW_OPCODE_MUL);
+      break;
+   case TGSI_OPCODE_POW:
+      emit_pow(c, inst);
+      break;
+   case TGSI_OPCODE_MAD:
+      emit_mad(c, inst);
+      break;
+   case TGSI_OPCODE_TEX:
+      emit_tex(c, inst);
+      break;
+   case TGSI_OPCODE_TXB:
+      emit_txb(c, inst);
+      break;
+   case TGSI_OPCODE_TEXKILL:
+      emit_kil(c);
+      break;
+   case TGSI_OPCODE_IF:
+      assert(c->if_insn < MAX_IFSN);
+      c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8);
+      break;
+   case TGSI_OPCODE_ELSE:
+      c->if_inst[c->if_insn-1]  = brw_ELSE(p, c->if_inst[c->if_insn-1]);
+      break;
+   case TGSI_OPCODE_ENDIF:
+      assert(c->if_insn > 0);
+      brw_ENDIF(p, c->if_inst[--c->if_insn]);
+      break;
+   case TGSI_OPCODE_BGNSUB:
+   case TGSI_OPCODE_ENDSUB:
+      break;
+   case TGSI_OPCODE_CAL:
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_access_mode(p, BRW_ALIGN_1);
+      brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+      brw_set_access_mode(p, BRW_ALIGN_16);
+      brw_ADD(p, 
+             get_addr_reg(c->stack_index),
+             get_addr_reg(c->stack_index), brw_imm_d(4));
+//      orig_inst = inst->Data;
+//      orig_inst->Data = &p->store[p->nr_insn];
+      assert(0);
+      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+      brw_pop_insn_state(p);
+      break;
+
+   case TGSI_OPCODE_RET:
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_ADD(p, 
+             get_addr_reg(c->stack_index),
+             get_addr_reg(c->stack_index), brw_imm_d(-4));
+      brw_set_access_mode(p, BRW_ALIGN_1);
+      brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0));
+      brw_set_access_mode(p, BRW_ALIGN_16);
+      brw_pop_insn_state(p);
+
+      break;
+   case TGSI_OPCODE_LOOP:
+      c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8);
+      break;
+   case TGSI_OPCODE_BRK:
+      brw_BREAK(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      break;
+   case TGSI_OPCODE_CONT:
+      brw_CONT(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      break;
+   case TGSI_OPCODE_ENDLOOP:
+      c->loop_insn--;
+      c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]);
+      /* patch all the BREAK instructions from
+        last BEGINLOOP */
+      while (c->inst0 > c->loop_inst[c->loop_insn]) {
+        c->inst0--;
+        if (c->inst0->header.opcode == BRW_OPCODE_BREAK) {
+           c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1;
+           c->inst0->bits3.if_else.pop_count = 0;
+        } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+           c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0;
+           c->inst0->bits3.if_else.pop_count = 0;
+        }
+      }
+      break;
+   case TGSI_OPCODE_END:
+      emit_fb_write(c, inst);
+      break;
+
+   default:
+      _mesa_printf("unsupported IR in fragment shader %d\n",
+                  inst->Instruction.Opcode);
+   }
+#if 0
+   if (inst->CondUpdate)
+      brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+   else
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
 }
 
+
+
+
+
+
 void brw_wm_glsl_emit(struct brw_wm_compile *c)
 {
-    brw_wm_pass_fp(c);
-    c->tmp_index = 127;
-    brw_wm_emit_glsl(c);
-    c->prog_data.total_grf = c->reg_index;
-    c->prog_data.total_scratch = 0;
-}
+   struct tgsi_parse_context parse;
+   struct brw_compile *p = &c->func;
+
+   brw_init_compile(&c->func);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+   c->reg_index = 0;
+   c->if_insn = 0;
+   c->loop_insn = 0;
+   c->stack_index = brw_indirect(0,0);
+
+   /* Do static register allocation and parameter interpolation:
+    */
+   brw_wm_emit_decls( c );
+
+   /* Emit the actual program.  All done with very direct translation,
+    * hopefully we can improve on this shortly...
+    */
+   brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+
+   tgsi_parse_init( &parse, c->fp->program.tokens );
+
+   while( !tgsi_parse_end_of_tokens( &parse ) ) 
+   {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+        /* already done */
+        break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         /* not handled yet */
+        assert(0);
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction);
+         break;
+
+      default:
+         assert( 0 );
+      }
+   }
+
+   tgsi_parse_free (&parse);
+   
+   /* Fix up call targets:
+    */
+#if 0
+   {
+      unsigned nr_insns = c->fp->program.Base.NumInstructions;
+      unsigned insn, target_insn;
+      struct tgsi_full_instruction *inst1, *inst2;
+      struct brw_instruction *brw_inst1, *brw_inst2;
+      int offset;
+      for (insn = 0; insn < nr_insns; insn++) {
+        inst1 = &c->fp->program.Base.Instructions[insn];
+        brw_inst1 = inst1->Data;
+        switch (inst1->Opcode) {
+        case TGSI_OPCODE_CAL:
+           target_insn = inst1->BranchTarget;
+           inst2 = &c->fp->program.Base.Instructions[target_insn];
+           brw_inst2 = inst2->Data;
+           offset = brw_inst2 - brw_inst1;
+           brw_set_src1(brw_inst1, brw_imm_d(offset*16));
+           break;
+        default:
+           break;
+        }
+      }
+   }
 #endif
+
+   c->prog_data.total_grf = c->reg_index;
+   c->prog_data.total_scratch = 0;
+}
index 4ea0dd7db0f41fc419593d8e556e84a357881b37..fbeea8c8090a49a864386544701ff012b791f5d9 100644 (file)
@@ -229,12 +229,12 @@ static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_samp
  * complicates various things.  However, this is still too confusing -
  * FIXME: simplify all the different new texture state flags.
  */
-void brw_upload_wm_samplers(struct brw_context *brw)
+static void upload_wm_samplers(struct brw_context *brw)
 {
    unsigned unit;
    unsigned sampler_count = 0;
 
-   /* _NEW_TEXTURE */
+   /* BRW_NEW_SAMPLER */
    for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
       if (brw->attribs.Samplers[unit]) { /* FIXME: correctly detect enabled ones */
          const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit];
@@ -262,14 +262,11 @@ void brw_upload_wm_samplers(struct brw_context *brw)
                           sizeof(struct brw_sampler_state) * brw->wm.sampler_count);
 }
 
-#if 0
 const struct brw_tracked_state brw_wm_samplers = {
    .dirty = {
-      .mesa = _NEW_TEXTURE,
-      .brw = 0,
+      .brw = BRW_NEW_SAMPLER,
       .cache = 0
    },
    .update = upload_wm_samplers
 };
-#endif
 
index b45fb2f56bb24ae91c0523b26b269ebdd391480b..52d2c854237c6557a299c70d356ec3b11c6116b8 100644 (file)
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_wm.h"
+#include "pipe/p_util.h"
 
 /***********************************************************************
  * WM unit - fragment programs and rasterization
  */
-
-#if 0
 static void upload_wm_unit(struct brw_context *brw )
 {
-   struct intel_context *intel = &brw->intel;
    struct brw_wm_unit_state wm;
    unsigned max_threads;
    unsigned per_thread;
@@ -56,7 +54,7 @@ static void upload_wm_unit(struct brw_context *brw )
    memset(&wm, 0, sizeof(wm));
 
    /* CACHE_NEW_WM_PROG */
-   wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1;
+   wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1;
    wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6;
    wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
    wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length;
@@ -64,9 +62,10 @@ static void upload_wm_unit(struct brw_context *brw )
 
    wm.wm5.max_threads = max_threads;
 
-   per_thread = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+   per_thread = align(brw->wm.prog_data->total_scratch, 1024);
    assert(per_thread <= 12 * 1024);
 
+#if 0
    if (brw->wm.prog_data->total_scratch) {
       unsigned total = per_thread * (max_threads + 1);
 
@@ -95,6 +94,7 @@ static void upload_wm_unit(struct brw_context *brw )
     * so just fail for now if we hit that path.
     */
    assert(brw->wm.prog_data->total_scratch == 0);
+#endif
 
    /* CACHE_NEW_SURFACE */
    wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces;
@@ -112,23 +112,20 @@ static void upload_wm_unit(struct brw_context *brw )
 
    /* BRW_NEW_FRAGMENT_PROGRAM */
    {
-      const struct gl_fragment_program *fp = brw->fragment_program;
+      const struct brw_fragment_program *fp = brw->attribs.FragmentProgram;
 
-      if (fp->Base.InputsRead & (1<<FRAG_ATTRIB_WPOS))
+      if (fp->UsesDepth)
         wm.wm5.program_uses_depth = 1; /* as far as we can tell */
 
-      if (fp->Base.OutputsWritten & (1<<FRAG_RESULT_DEPR))
+      if (fp->ComputesDepth)
         wm.wm5.program_computes_depth = 1;
 
-      /* _NEW_COLOR */
+      /* BRW_NEW_ALPHA_TEST */
       if (fp->UsesKill ||
-         brw->attribs.Color->AlphaEnabled)
+         brw->attribs.AlphaTest->enabled)
         wm.wm5.program_uses_killpixel = 1;
 
-      if (brw_wm_is_glsl(fp))
-         wm.wm5.enable_8_pix = 1;
-      else
-         wm.wm5.enable_16_pix = 1;
+      wm.wm5.enable_8_pix = 1;
    }
 
    wm.wm5.thread_dispatch_enable = 1;  /* AKA: color_write */
@@ -138,11 +135,11 @@ static void upload_wm_unit(struct brw_context *brw )
    wm.wm5.line_aa_region_width = 0;
    wm.wm5.line_endcap_aa_region_width = 1;
 
-   /* _NEW_POLYGONSTIPPLE */
-   if (brw->attribs.Polygon->StippleFlag)
+   /* BRW_NEW_RASTERIZER */
+   if (brw->attribs.Raster->poly_stipple_enable)
       wm.wm5.polygon_stipple = 1;
 
-   /* _NEW_POLYGON */
+#if 0
    if (brw->attribs.Polygon->OffsetFill) {
       wm.wm5.depth_offset = 1;
       /* Something wierd going on with legacy_global_depth_bias,
@@ -156,13 +153,13 @@ static void upload_wm_unit(struct brw_context *brw )
        */
       wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor;
    }
+#endif
 
-   /* _NEW_LINE */
-   if (brw->attribs.Line->StippleFlag) {
+   if (brw->attribs.Raster->line_stipple_enable) {
       wm.wm5.line_stipple = 1;
    }
 
-   if (BRW_DEBUG & DEBUG_STATS || intel->stats_wm)
+   if (BRW_DEBUG & DEBUG_STATS)
       wm.wm4.stats_enable = 1;
 
    brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm );
@@ -183,14 +180,10 @@ static void upload_wm_unit(struct brw_context *brw )
 
 const struct brw_tracked_state brw_wm_unit = {
    .dirty = {
-      .mesa = (_NEW_POLYGON |
-              _NEW_POLYGONSTIPPLE |
-              _NEW_LINE |
-              _NEW_COLOR),
-
-      .brw = (BRW_NEW_FRAGMENT_PROGRAM |
-             BRW_NEW_CURBE_OFFSETS |
-             BRW_NEW_LOCK),
+      .brw = (BRW_NEW_RASTERIZER |
+             BRW_NEW_ALPHA_TEST |
+             BRW_NEW_FS |
+             BRW_NEW_CURBE_OFFSETS),
 
       .cache = (CACHE_NEW_SURFACE |
                CACHE_NEW_WM_PROG |
@@ -199,4 +192,3 @@ const struct brw_tracked_state brw_wm_unit = {
    .update = upload_wm_unit
 };
 
-#endif
index e6d284d9327deda89323753ab0425c9f254d68dc..46edcf307530493c4364f5e3c05555a0d9467c5b 100644 (file)
@@ -360,6 +360,11 @@ static INLINE float LOG2(float val)
 #define CEILF(x)   ((float) ceil(x))
 #endif
 
+static INLINE int align(int value, int alignment)
+{
+   return (value + alignment - 1) & ~(alignment - 1);
+}
+
 /* Convenient...
  */
 extern void _mesa_printf(const char *str, ...);
index 2dd1add6f7b1d717c201b9106ec5820d37df84f4..44512e4281eff864fa49351ce45eccc50581ae0b 100644 (file)
@@ -52,10 +52,6 @@ static unsigned minify( unsigned d )
    return MAX2(1, d>>1);
 }
 
-static int align(int value, int alignment)
-{
-   return (value + alignment - 1) & ~(alignment - 1);
-}
 
 
 static void