i965g: work in progress on fragment shaders
authorKeith Whitwell <keithw@vmware.com>
Thu, 29 Oct 2009 20:18:01 +0000 (20:18 +0000)
committerKeith Whitwell <keithw@vmware.com>
Thu, 29 Oct 2009 20:18:01 +0000 (20:18 +0000)
18 files changed:
src/gallium/drivers/i965/brw_context.h
src/gallium/drivers/i965/brw_eu.c
src/gallium/drivers/i965/brw_eu.h
src/gallium/drivers/i965/brw_pipe_depth.c
src/gallium/drivers/i965/brw_pipe_rast.c
src/gallium/drivers/i965/brw_pipe_rast.h
src/gallium/drivers/i965/brw_pipe_shader.c
src/gallium/drivers/i965/brw_screen.h
src/gallium/drivers/i965/brw_vs_emit.c
src/gallium/drivers/i965/brw_wm.c
src/gallium/drivers/i965/brw_wm.h
src/gallium/drivers/i965/brw_wm_debug.c
src/gallium/drivers/i965/brw_wm_emit.c
src/gallium/drivers/i965/brw_wm_fp.c
src/gallium/drivers/i965/brw_wm_glsl.c
src/gallium/drivers/i965/brw_wm_pass0.c
src/gallium/drivers/i965/brw_wm_pass1.c
src/gallium/drivers/i965/brw_wm_state.c

index 7b85363e9f05e69040a8bfb903739cef296ae01c..e6c31610669f641e760fb383b1ac9e6afb0da00b 100644 (file)
@@ -132,6 +132,8 @@ struct brw_depth_stencil_state {
    struct brw_cc2 cc2;
    struct brw_cc3 cc3;
    struct brw_cc7 cc7;
+
+   unsigned iz_lookup;
 };
 
 
@@ -164,7 +166,10 @@ struct brw_fragment_shader {
    const struct tgsi_token *tokens;
    struct tgsi_shader_info info;
 
-   GLboolean isGLSL;
+   unsigned iz_lookup;
+   
+   boolean  uses_depth:1;
+   boolean  has_flow_control:1;
 
    unsigned id;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
@@ -194,6 +199,7 @@ struct brw_fragment_shader {
 #define PIPE_NEW_COLOR_BUFFERS          0x40000
 #define PIPE_NEW_QUERY                  0x80000
 #define PIPE_NEW_SCISSOR                0x100000
+#define PIPE_NEW_BOUND_TEXTURES         0x200000
 
 
 
@@ -487,7 +493,7 @@ struct brw_context
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
 
-      const struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+      const struct brw_texture *texture[PIPE_MAX_SAMPLERS];
       const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS];
       unsigned num_textures;
       unsigned num_samplers;
index 1189a35b6f2e70bef171187a4102e8dfebabe30e..de43b14512d785bc59abbb148e3bfb75603a4601 100644 (file)
@@ -150,22 +150,22 @@ const GLuint *brw_get_program( struct brw_compile *p,
 /**
  * For each OPCODE_BGNSUB we create one of these.
  */
-struct brw_glsl_label
+struct brw_eu_label
 {
    GLuint label;     /**< the label number */
    GLuint position;  /**< the position of the brw instruction for this label */
-   struct brw_glsl_label *next;  /**< next in linked list */
+   struct brw_eu_label *next;  /**< next in linked list */
 };
 
 
 /**
  * For each OPCODE_CAL we create one of these.
  */
-struct brw_glsl_call
+struct brw_eu_call
 {
    GLuint call_inst_pos;  /**< location of the CAL instruction */
    GLuint label;
-   struct brw_glsl_call *next;  /**< next in linked list */
+   struct brw_eu_call *next;  /**< next in linked list */
 };
 
 
@@ -175,7 +175,7 @@ struct brw_glsl_call
 void
 brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
 {
-   struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
+   struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label);
    label->label = l;
    label->position = position;
    label->next = c->first_label;
@@ -189,7 +189,7 @@ brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
 void
 brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
 {
-   struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
+   struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call);
    call->call_inst_pos = call_pos;
    call->label = label;
    call->next = c->first_call;
@@ -203,7 +203,7 @@ brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
 static GLuint
 brw_lookup_label(struct brw_compile *c, unsigned l)
 {
-   const struct brw_glsl_label *label;
+   const struct brw_eu_label *label;
    for (label = c->first_label; label; label = label->next) {
       if (l == label->label) {
          return label->position;
@@ -221,7 +221,7 @@ brw_lookup_label(struct brw_compile *c, unsigned l)
 void
 brw_resolve_cals(struct brw_compile *c)
 {
-    const struct brw_glsl_call *call;
+    const struct brw_eu_call *call;
 
     for (call = c->first_call; call; call = call->next) {
         const GLuint sub_loc = brw_lookup_label(c, call->label);
@@ -235,7 +235,7 @@ brw_resolve_cals(struct brw_compile *c)
 
     /* free linked list of calls */
     {
-        struct brw_glsl_call *call, *next;
+        struct brw_eu_call *call, *next;
         for (call = c->first_call; call; call = next) {
            next = call->next;
            FREE(call);
@@ -245,7 +245,7 @@ brw_resolve_cals(struct brw_compile *c)
 
     /* free linked list of labels */
     {
-        struct brw_glsl_label *label, *next;
+        struct brw_eu_label *label, *next;
        for (label = c->first_label; label; label = next) {
            next = label->next;
            FREE(label);
index 3379522104a02895711616d460738a7af0963c7a..7bddc3859ca240a1b79dd7dde6f95460320c8d95 100644 (file)
@@ -109,8 +109,8 @@ struct brw_indirect {
 };
 
 
-struct brw_glsl_label;
-struct brw_glsl_call;
+struct brw_eu_label;
+struct brw_eu_call;
 
 
 
@@ -130,8 +130,8 @@ struct brw_compile {
    GLboolean single_program_flow;
    struct brw_context *brw;
 
-   struct brw_glsl_label *first_label;  /**< linked list of labels */
-   struct brw_glsl_call *first_call;    /**< linked list of CALs */
+   struct brw_eu_label *first_label;  /**< linked list of labels */
+   struct brw_eu_call *first_call;    /**< linked list of CALs */
 };
 
 
index 33fe517e0b81e5f414df85884271a7d2b553118e..e010d76e0d3d680e9fa3a583b93fec54671b830a 100644 (file)
@@ -5,6 +5,10 @@
 #include "brw_context.h"
 #include "brw_defines.h"
 
+/* XXX: Fixme - include this to get IZ_ defines
+ */
+#include "brw_wm.h"
+
 static unsigned brw_translate_compare_func(unsigned func)
 {
    switch (func) {
@@ -55,13 +59,9 @@ static unsigned translate_stencil_op(unsigned op)
    }
 }
 
-
-static void *
-brw_create_depth_stencil_state( struct pipe_context *pipe,
-                               const struct pipe_depth_stencil_alpha_state *templ )
+static void create_bcc_state( struct brw_depth_stencil_state *zstencil,
+                             const struct pipe_depth_stencil_alpha_state *templ )
 {
-   struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
-
    if (templ->stencil[0].enabled) {
       zstencil->cc0.stencil_enable = 1;
       zstencil->cc0.stencil_func =
@@ -108,6 +108,36 @@ brw_create_depth_stencil_state( struct pipe_context *pipe,
       zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func);
       zstencil->cc2.depth_write_enable = templ->depth.writemask;
    }
+}
+
+static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil )
+{
+   if (zstencil->cc3.alpha_test)
+      zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+   if (zstencil->cc2.depth_test)
+      zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+   if (zstencil->cc2.depth_write_enable)
+      zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+   if (zstencil->cc0.stencil_enable)
+      zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+   if (zstencil->cc0.stencil_write_enable)
+      zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+
+}
+
+
+static void *
+brw_create_depth_stencil_state( struct pipe_context *pipe,
+                               const struct pipe_depth_stencil_alpha_state *templ )
+{
+   struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
+
+   create_bcc_state( zstencil, templ );
+   create_wm_iz_state( zstencil );
 
    return (void *)zstencil;
 }
index 86822d478a5bd718b3599c6f461ae65951c3384d..51159bf14711950b787c6761aff75556a32e488c 100644 (file)
@@ -64,3 +64,21 @@ calculate_line_stipple_rast()
    bls.bits1.inverse_repeat_count = tmpi;
 
 }
+
+
+
+static void
+calculate_wm_lookup()
+{
+   if (rast->fill_cw == PIPE_POLYGON_MODE_LINE &&
+       rast->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+      line_aa = AA_ALWAYS;
+   }
+   else if (rast->fill_cw == PIPE_POLYGON_MODE_LINE ||
+           rast->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+      line_aa = AA_SOMETIMES;
+   }
+   else {
+      line_aa = AA_NEVER;
+   }
+}
index 800a9208a7ca304ace3c1558290505eb9d210e39..9354f01e18a15f0f8dbd9696c1262dbb340590af 100644 (file)
@@ -10,6 +10,7 @@ struct brw_rasterizer_state {
     */
    struct brw_clip_prog_key clip_key;
    struct brw_line_stipple bls;
+   unsigned unfilled_aa_line;
 };
 
 #endif
index 8b61da763cd02760d81c6c49af595391ba3bc63f..6e37eac634344484df0e0e1a0445def0a9990463 100644 (file)
@@ -39,7 +39,7 @@
  * as flow conditionals, loops, subroutines.
  * Some GLSL shaders may use these features, others might not.
  */
-GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp)
+GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp)
 {
     return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 ||
            fp->info.insn_count[TGSI_OPCODE_IF] > 0 ||
@@ -144,7 +144,7 @@ static void brwProgramStringNotify( struct brw_context *brw,
       if (newFP == curFP)
         brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
       newFP->id = brw->program_id++;      
-      newFP->isGLSL = brw_wm_is_glsl(fprog);
+      newFP->has_flow_control = brw_wm_has_flow_control(fprog);
    }
    else if (target == GL_VERTEX_PROGRAM_ARB) {
       struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
index eafd8ddf77e53d4cd32ca2e81e7fc929c478361c..efa27db1e0f554082671ee1829fd54f4b0307eaa 100644 (file)
@@ -64,6 +64,13 @@ struct brw_buffer
    boolean is_user_buffer;
 };
 
+struct brw_texture
+{
+   struct pipe_texture base;
+
+   ubyte shader_swizzle;
+};
+
 
 /*
  * Cast wrappers
index 6809bccdecc442c2b8284d81f06c6d797498f2ff..bcc5c5f71300635cf47cbd507298702b02059f75 100644 (file)
@@ -1013,8 +1013,6 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
                                       src->SrcRegister.SwizzleZ,
                                       src->SrcRegister.SwizzleW);
 
-   /* Note this is ok for non-swizzle instructions: 
-    */
    reg.negate = src->SrcRegister.Negate ? 1 : 0;   
 
    /* XXX: abs, absneg
index f0dabfcfd0e632dd76b949e13d8823188f3dd8f7..33602b59c1b1584b1dd2078eda804807c8629ef1 100644 (file)
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
+#include "pipe/p_error.h"
 
 #include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
+#include "brw_screen.h"
 #include "brw_util.h"
 #include "brw_wm.h"
 #include "brw_state.h"
 #include "brw_debug.h"
+#include "brw_pipe_rast.h"
 
 
 /** Return number of src args for given instruction */
@@ -85,12 +88,12 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
 
 
 /**
- * Do GPU code generation for non-GLSL shader.  non-GLSL shaders have
- * no flow control instructions so we can more readily do SSA-style
- * optimizations.
+ * Do GPU code generation for shaders without flow control.  Shaders
+ * without flow control instructions can more readily be analysed for
+ * SSA-style optimizations.
  */
 static void
-brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
 {
    /* Augment fragment program.  Add instructions for pre- and
     * post-fragment-program tasks such as interpolation and fogging.
@@ -136,7 +139,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
  * Depending on the instructions used (i.e. flow control instructions)
  * we'll use one of two code generators.
  */
-static void do_wm_prog( struct brw_context *brw,
+static int do_wm_prog( struct brw_context *brw,
                        struct brw_fragment_shader *fp, 
                        struct brw_wm_prog_key *key)
 {
@@ -153,7 +156,7 @@ static void do_wm_prog( struct brw_context *brw,
           * without triggering a segfault, no way to signal,
           * so just return.
           */
-         return;
+         return PIPE_ERROR_OUT_OF_MEMORY;
       }
    } else {
       memset(c, 0, sizeof(*brw->wm.compile_data));
@@ -166,19 +169,19 @@ static void do_wm_prog( struct brw_context *brw,
    brw_init_compile(brw, &c->func);
 
    /* temporary sanity check assertion */
-   assert(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+   assert(fp->has_flow_control == brw_wm_has_flow_control(c->fp));
 
    /*
     * Shader which use GLSL features such as flow control are handled
     * differently from "simple" shaders.
     */
-   if (fp->isGLSL) {
+   if (fp->has_flow_control) {
       c->dispatch_width = 8;
-      brw_wm_glsl_emit(brw, c);
+      brw_wm_branching_shader_emit(brw, c);
    }
    else {
       c->dispatch_width = 16;
-      brw_wm_non_glsl_emit(brw, c);
+      brw_wm_linear_shader_emit(brw, c);
    }
 
    if (BRW_DEBUG & DEBUG_WM)
@@ -195,6 +198,8 @@ static void do_wm_prog( struct brw_context *brw,
                                       program, program_size,
                                       &c->prog_data,
                                       &brw->wm.prog_data );
+
+   return 0;
 }
 
 
@@ -202,71 +207,36 @@ static void do_wm_prog( struct brw_context *brw,
 static void brw_wm_populate_key( struct brw_context *brw,
                                 struct brw_wm_prog_key *key )
 {
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct brw_fragment_program *fp = brw->curr.fragment_shader;
-   GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
-   GLuint lookup = 0;
-   GLuint line_aa;
-   GLuint i;
+   unsigned lookup, line_aa;
+   unsigned i;
 
    memset(key, 0, sizeof(*key));
 
-   /* Build the index for table lookup
+   /* PIPE_NEW_FRAGMENT_SHADER
+    * PIPE_NEW_DEPTH_STENCIL_ALPHA
     */
-   /* _NEW_COLOR */
-   if (fp->program.UsesKill ||
-       ctx->Color.AlphaEnabled)
-      lookup |= IZ_PS_KILL_ALPHATEST_BIT;
-
-   if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
-      lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
-
-   /* _NEW_DEPTH */
-   if (ctx->Depth.Test)
-      lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
-
-   if (ctx->Depth.Test &&  
-       ctx->Depth.Mask) /* ?? */
-      lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+   lookup = (brw->curr.zstencil->iz_lookup |
+            brw->curr.fragment_shader->iz_lookup);
 
-   /* _NEW_STENCIL */
-   if (ctx->Stencil._Enabled) {
-      lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 
-      if (ctx->Stencil.WriteMask[0] ||
-         ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
-        lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
-   }
-
-   line_aa = AA_NEVER;
-
-   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
-   if (ctx->Line.SmoothFlag) {
-      if (brw->intel.reduced_primitive == GL_LINES) {
-        line_aa = AA_ALWAYS;
-      }
-      else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
-        if (ctx->Polygon.FrontMode == GL_LINE) {
-           line_aa = AA_SOMETIMES;
-
-           if (ctx->Polygon.BackMode == GL_LINE ||
-               (ctx->Polygon.CullFlag &&
-                ctx->Polygon.CullFaceMode == GL_BACK))
-              line_aa = AA_ALWAYS;
-        }
-        else if (ctx->Polygon.BackMode == GL_LINE) {
-           line_aa = AA_SOMETIMES;
-
-           if ((ctx->Polygon.CullFlag &&
-                ctx->Polygon.CullFaceMode == GL_FRONT))
-              line_aa = AA_ALWAYS;
-        }
-      }
+   /* PIPE_NEW_RAST
+    * BRW_NEW_REDUCED_PRIMITIVE 
+    */
+   switch (brw->reduced_primitive) {
+   case PIPE_PRIM_POINTS:
+      line_aa = AA_NEVER;
+      break;
+   case PIPE_PRIM_LINES:
+      line_aa = AA_ALWAYS;
+      break;
+   default:
+      line_aa = brw->curr.rast->unfilled_aa_line;
+      break;
    }
         
    brw_wm_lookup_iz(line_aa,
                    lookup,
-                   uses_depth,
+                   brw->curr.fragment_shader->uses_depth,
                    key);
 
    /* Revisit this, figure out if it's really useful, and either push
@@ -276,54 +246,39 @@ static void brw_wm_populate_key( struct brw_context *brw,
    key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/
 
    /* PIPE_NEW_RAST */
-   key->flat_shade = brw->rast.flat_shade;
+   key->flat_shade = brw->curr.rast->templ.flatshade;
 
    /* This can be determined by looking at the INTERP mode each input decl.
     */
-   key->linear_color = 0;
-
-   /* _NEW_TEXTURE */
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      if (i < brw->nr_textures) {
-        const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
-        const struct gl_texture_object *t = unit->_Current;
-        const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
-        
-        if (img->InternalFormat == GL_YCBCR_MESA) {
-           key->yuvtex_mask |= 1 << i;
-           if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
-              key->yuvtex_swap_mask |= 1 << i;
-        }
+   key->linear_attrib_mask = 0;
 
-        key->tex_swizzles[i] = t->_Swizzle;
+   /* PIPE_NEW_BOUND_TEXTURES */
+   for (i = 0; i < brw->curr.num_textures; i++) {
+      const struct brw_texture *tex = brw->curr.texture[i];
         
-        if (0)
-           key->shadowtex_mask |= 1<<i;
-      }
-      else {
-         key->tex_swizzles[i] = SWIZZLE_NOOP;
-      }
-   }
+      if (tex->base.format == PIPE_FORMAT_YCBCR)
+        key->yuvtex_mask |= 1 << i;
 
+      if (tex->base.format == PIPE_FORMAT_YCBCR_REV)
+        key->yuvtex_swap_mask |= 1 << i;
 
-   /* _NEW_FRAMEBUFFER */
-   if (brw->intel.driDrawable != NULL) {
-      key->drawable_height = brw->fb.cbufs[0].height;
+      /* XXX: shadow texture
+       */
+      /* key->shadowtex_mask |= 1<<i; */
    }
 
    /* CACHE_NEW_VS_PROG */
-   key->vp_nr_outputs_written = brw->vs.prog_data->nr_outputs_written;
+   key->vp_nr_outputs = brw->vs.prog_data->nr_outputs;
 
    /* The unique fragment program ID */
-   key->program_string_id = fp->id;
+   key->program_string_id = brw->curr.fragment_shader->id;
 }
 
 
-static void brw_prepare_wm_prog(struct brw_context *brw)
+static int brw_prepare_wm_prog(struct brw_context *brw)
 {
    struct brw_wm_prog_key key;
-   struct brw_fragment_program *fp = (struct brw_fragment_program *)
-      brw->fragment_program;
+   struct brw_fragment_shader *fs = brw->curr.fragment_shader;
      
    brw_wm_populate_key(brw, &key);
 
@@ -335,23 +290,19 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
                                      NULL, 0,
                                      &brw->wm.prog_data);
    if (brw->wm.prog_bo == NULL)
-      do_wm_prog(brw, fp, &key);
+      return do_wm_prog(brw, fs, &key);
+
+   return 0;
 }
 
 
 const struct brw_tracked_state brw_wm_prog = {
    .dirty = {
-      .mesa  = (_NEW_COLOR |
-               _NEW_DEPTH |
-                _NEW_HINT |
-               _NEW_STENCIL |
-               _NEW_POLYGON |
-               _NEW_LINE |
-               _NEW_LIGHT |
-               _NEW_BUFFERS |
-               _NEW_TEXTURE),
-      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_WM_INPUT_DIMENSIONS |
+      .mesa  = (PIPE_NEW_FRAGMENT_SHADER |
+               PIPE_NEW_DEPTH_STENCIL_ALPHA |
+               PIPE_NEW_RAST |
+               PIPE_NEW_BOUND_TEXTURES),
+      .brw   = (BRW_NEW_WM_INPUT_DIMENSIONS |
                BRW_NEW_REDUCED_PRIMITIVE),
       .cache = CACHE_NEW_VS_PROG,
    },
index 084430cf282a8b5fba0b1f09d9880f348824166e..2cd5bb708186a4ae4a6b07606b8f61afa276927d 100644 (file)
@@ -33,9 +33,6 @@
 #ifndef BRW_WM_H
 #define BRW_WM_H
 
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_ureg_parse.h"
-
 #include "brw_context.h"
 #include "brw_eu.h"
 
@@ -59,8 +56,8 @@
 #define AA_ALWAYS    2
 
 struct brw_wm_prog_key {
-   unsigned proj_attrib_mask; /**< one bit per fragment program attribute */
-   unsigned linear_attrib_mask:1;  /**< linear interpolation vs perspective interp */
+   unsigned proj_attrib_mask;    /**< one bit per fragment program attribute */
+   unsigned linear_attrib_mask;  /**< linear interpolation vs perspective interp */
 
    GLuint source_depth_reg:3;
    GLuint aa_dest_stencil_reg:3;
@@ -75,11 +72,10 @@ struct brw_wm_prog_key {
    GLuint yuvtex_mask:16;
    GLuint yuvtex_swap_mask:16; /* UV swaped */
 
-   GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
-
-   GLuint program_string_id:32;
+   GLuint vp_nr_outputs:6;
+   GLuint nr_cbufs:3;
 
-   GLuint vp_nr_outputs_written;
+   GLuint program_string_id;
 };
 
 
@@ -146,9 +142,8 @@ struct brw_wm_instruction {
    GLuint opcode:8;
    GLuint saturate:1;
    GLuint writemask:4;
-   GLuint tex_unit:4;   /* texture unit for TEX, TXD, TXP instructions */
-   GLuint tex_idx:3;    /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
-   GLuint tex_shadow:1; /* do shadow comparison? */
+   GLuint tex_unit:4;   /* texture/sampler unit for texture instructions */
+   GLuint tex_target:4; /* TGSI_TEXTURE_x for texture instructions*/
    GLuint eot:1;       /* End of thread indicator for FB_WRITE*/
    GLuint target:10;    /* target binding table index for FB_WRITE*/
 };
@@ -180,15 +175,17 @@ struct brw_wm_instruction {
 #define WM_FRONTFACING    (MAX_OPCODE + 8)
 #define MAX_WM_OPCODE     (MAX_OPCODE + 9)
 
-#define PROGRAM_PAYLOAD   (TGSI_FILE_COUNT)
-#define PAYLOAD_DEPTH     (FRAG_ATTRIB_MAX)
+#define BRW_FILE_PAYLOAD   (TGSI_FILE_COUNT)
+#define PAYLOAD_DEPTH      (FRAG_ATTRIB_MAX) /* ?? */
+
+struct brw_passfp_program;
 
 struct brw_wm_compile {
    struct brw_compile func;
    struct brw_wm_prog_key key;
    struct brw_wm_prog_data prog_data;
 
-   struct brw_fragment_program *fp;
+   struct brw_fragment_shader *fp;
 
    GLfloat (*env_param)[4];
 
@@ -201,15 +198,7 @@ struct brw_wm_compile {
     * simplifying and adding instructions for interpolation and
     * framebuffer writes.
     */
-   struct ureg_instruction prog_instructions[BRW_WM_MAX_INSN];
-   GLuint nr_fp_insns;
-   GLuint fp_temp;
-   GLuint fp_interp_emitted;
-   GLuint fp_fragcolor_emitted;
-
-   struct ureg_src pixel_xy;
-   struct ureg_src delta_xy;
-   struct ureg_src pixel_w;
+   struct brw_passfp_program *pass_fp;
 
 
    struct brw_wm_value vreg[BRW_WM_MAX_VREG];
@@ -298,8 +287,8 @@ void brw_wm_lookup_iz( GLuint line_aa,
                       GLboolean ps_uses_depth,
                       struct brw_wm_prog_key *key );
 
-//GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp);
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c);
 
 void emit_ddxy(struct brw_compile *p,
               const struct brw_reg *dst,
index 04dec5ba392209cbd5925a6d90e134d7ff49d609..65d7626eea9d86f1814be092905aedbe5b17e6be 100644 (file)
@@ -28,7 +28,8 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-               
+
+#include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
 #include "brw_wm.h"
@@ -49,10 +50,10 @@ void brw_wm_print_value( struct brw_wm_compile *c,
            value - c->creg < BRW_WM_MAX_PARAM)
       debug_printf("c%d", value - c->creg);
    else if (value - c->payload.input_interp >= 0 &&
-           value - c->payload.input_interp < FRAG_ATTRIB_MAX)
+           value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS)
       debug_printf("i%d", value - c->payload.input_interp);
    else if (value - c->payload.depth >= 0 &&
-           value - c->payload.depth < FRAG_ATTRIB_MAX)
+           value - c->payload.depth < PIPE_MAX_SHADER_INPUTS)
       debug_printf("d%d", value - c->payload.depth);
    else 
       debug_printf("?");
@@ -100,10 +101,10 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
 
    if (inst->writemask != BRW_WRITEMASK_XYZW)
       debug_printf(".%s%s%s%s", 
-                  GET_BIT(inst->writemask, 0) ? "x" : "",
-                  GET_BIT(inst->writemask, 1) ? "y" : "",
-                  GET_BIT(inst->writemask, 2) ? "z" : "",
-                  GET_BIT(inst->writemask, 3) ? "w" : "");
+                  (inst->writemask & BRW_WRITEMASK_X) ? "x" : "",
+                  (inst->writemask & BRW_WRITEMASK_Y) ? "y" : "",
+                  (inst->writemask & BRW_WRITEMASK_Z) ? "z" : "",
+                  (inst->writemask & BRW_WRITEMASK_W) ? "w" : "");
 
    switch (inst->opcode) {
    case WM_PIXELXY:
@@ -134,7 +135,7 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
       debug_printf(" = FRONTFACING");
       break;
    default:
-      debug_printf(" = %s", _mesa_opcode_string(inst->opcode));
+      debug_printf(" = %s", tgsi_get_opcode_info(inst->opcode)->mnemonic);
       break;
    }
 
index 5f7ae6592c6b7d8d53e50238d3817fe6b841c8af..a705d8b34490013af3dd838346e78abe33514374 100644 (file)
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-               
+
+#include "util/u_math.h"
+#include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
 #include "brw_wm.h"
+#include "brw_debug.h"
 
 /* Not quite sure how correct this is - need to understand horiz
  * vs. vertical strides a little better.
@@ -45,15 +48,15 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
 
 /* Payload R0:
  *
- * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
  *         corresponding to each of the 16 execution channels.
  * R0.1..8 -- ?
  * R1.0 -- triangle vertex 0.X
  * R1.1 -- triangle vertex 0.Y
- * R1.2 -- tile 0 x,y coords (2 packed uwords)
- * R1.3 -- tile 1 x,y coords (2 packed uwords)
- * R1.4 -- tile 2 x,y coords (2 packed uwords)
- * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.2 -- quad 0 x,y coords (2 packed uwords)
+ * R1.3 -- quad 1 x,y coords (2 packed uwords)
+ * R1.4 -- quad 2 x,y coords (2 packed uwords)
+ * R1.5 -- quad 3 x,y coords (2 packed uwords)
  * R1.6 -- ?
  * R1.7 -- ?
  * R1.8 -- ?
@@ -134,11 +137,17 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
    /* XXX: is this needed any more, or is this a NOOP?
     */
    if (mask & BRW_WRITEMASK_Y) {
+#if 0
       /* Y' = height - 1 - Y */
       brw_ADD(p,
              dst[1],
              negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
              brw_imm_d(c->key.drawable_height - 1));
+#else
+      brw_MOV(p,
+             dst[0],
+             retype(arg0[0], BRW_REGISTER_TYPE_W));
+#endif
    }
 }
 
@@ -279,28 +288,28 @@ static void emit_frontfacing( struct brw_compile *p,
 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
  * looking like:
  *
- * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
  *
  * and we're trying to produce:
  *
  *           DDX                     DDY
- * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
- *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
- *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
- *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
- *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
- *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
- *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
- *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ * dst: (q0.tr - q0.tl)     (q0.tl - q0.bl)
+ *      (q0.tr - q0.tl)     (q0.tr - q0.br)
+ *      (q0.br - q0.bl)     (q0.tl - q0.bl)
+ *      (q0.br - q0.bl)     (q0.tr - q0.br)
+ *      (q1.tr - q1.tl)     (q1.tl - q1.bl)
+ *      (q1.tr - q1.tl)     (q1.tr - q1.br)
+ *      (q1.br - q1.bl)     (q1.tl - q1.bl)
+ *      (q1.br - q1.bl)     (q1.tr - q1.br)
  *
- * and add another set of two more subspans if in 16-pixel dispatch mode.
+ * and add two more quads if in 16-pixel dispatch mode.
  *
  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
  * between each other.  We could probably do it like ddx and swizzle the right
  * order later, but bail for now and just produce
- * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
  */
 void emit_ddxy(struct brw_compile *p,
               const struct brw_reg *dst,
@@ -611,12 +620,12 @@ static void emit_dp3( struct brw_compile *p,
                      const struct brw_reg *arg0,
                      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -633,12 +642,12 @@ static void emit_dp4( struct brw_compile *p,
                      const struct brw_reg *arg0,
                      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -656,12 +665,12 @@ static void emit_dph( struct brw_compile *p,
                      const struct brw_reg *arg0,
                      const struct brw_reg *arg1 )
 {
-   const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -704,12 +713,12 @@ static void emit_math1( struct brw_compile *p,
                        GLuint mask,
                        const struct brw_reg *arg0 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MOV(p, brw_message_reg(2), arg0[0]);
 
@@ -732,12 +741,12 @@ static void emit_math2( struct brw_compile *p,
                        const struct brw_reg *arg0,
                        const struct brw_reg *arg1)
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_push_insn_state(p);
 
@@ -790,21 +799,32 @@ static void emit_tex( struct brw_wm_compile *c,
    GLuint i, nr;
    GLuint emit;
    GLuint msg_type;
+   GLboolean shadow = FALSE;
 
    /* How many input regs are there?
     */
-   switch (inst->tex_idx) {
-   case TEXTURE_1D_INDEX:
+   switch (inst->tex_target) {
+   case TGSI_TEXTURE_1D:
       emit = BRW_WRITEMASK_X;
       nr = 1;
       break;
-   case TEXTURE_2D_INDEX:
-   case TEXTURE_RECT_INDEX:
+   case TGSI_TEXTURE_SHADOW1D:
+      emit = BRW_WRITEMASK_XW;
+      nr = 4;
+      shadow = TRUE;
+      break;
+   case TGSI_TEXTURE_2D:
       emit = BRW_WRITEMASK_XY;
       nr = 2;
       break;
-   case TEXTURE_3D_INDEX:
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      emit = BRW_WRITEMASK_XYW;
+      nr = 4;
+      shadow = TRUE;
+      break;
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
       emit = BRW_WRITEMASK_XYZ;
       nr = 3;
       break;
@@ -813,11 +833,6 @@ static void emit_tex( struct brw_wm_compile *c,
       abort();
    }
 
-   if (inst->tex_shadow) {
-      nr = 4;
-      emit |= BRW_WRITEMASK_W;
-   }
-
    msgLength = 1;
 
    for (i = 0; i < nr; i++) {
@@ -832,12 +847,12 @@ static void emit_tex( struct brw_wm_compile *c,
    responseLength = 8;         /* always */
 
    if (BRW_IS_IGDNG(p->brw)) {
-       if (inst->tex_shadow)
+       if (shadow)
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
        else
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
    } else {
-       if (inst->tex_shadow)
+       if (shadow)
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
        else
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
@@ -870,20 +885,23 @@ static void emit_txb( struct brw_wm_compile *c,
    GLuint msg_type;
    /* Shadow ignored for txb.
     */
-   switch (inst->tex_idx) {
-   case TEXTURE_1D_INDEX:
+   switch (inst->tex_target) {
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
-   case TEXTURE_2D_INDEX:
-   case TEXTURE_RECT_INDEX:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), arg[1]);
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
-   case TEXTURE_3D_INDEX:
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), arg[1]);
       brw_MOV(p, brw_message_reg(6), arg[2]);
@@ -976,10 +994,10 @@ static void emit_kil( struct brw_wm_compile *c,
    }
 }
 
-/* KIL_NV kills the pixels that are currently executing, not based on a test
+/* KILLP kills the pixels that are currently executing, not based on a test
  * of the arguments.
  */
-static void emit_kil_nv( struct brw_wm_compile *c )
+static void emit_killp( struct brw_wm_compile *c )
 {
    struct brw_compile *p = &c->func;
    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -1259,7 +1277,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
     */
    spill_values(c, c->payload.depth, 4);
    spill_values(c, c->creg, c->nr_creg);
-   spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
+   spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
    
 
    for (insn = 0; insn < c->nr_insns; insn++) {
@@ -1328,89 +1346,89 @@ void brw_wm_emit( struct brw_wm_compile *c )
 
         /* Straightforward arithmetic:
          */
-      case OPCODE_ADD:
+      case TGSI_OPCODE_ADD:
         emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_FRC:
+      case TGSI_OPCODE_FRC:
         emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_FLR:
+      case TGSI_OPCODE_FLR:
         emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_DDX:
+      case TGSI_OPCODE_DDX:
         emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
         break;
 
-      case OPCODE_DDY:
+      case TGSI_OPCODE_DDY:
         emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
         break;
 
-      case OPCODE_DP3:
+      case TGSI_OPCODE_DP3:
         emit_dp3(p, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_DP4:
+      case TGSI_OPCODE_DP4:
         emit_dp4(p, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_DPH:
+      case TGSI_OPCODE_DPH:
         emit_dph(p, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_TRUNC:
+      case TGSI_OPCODE_TRUNC:
         emit_trunc(p, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_LRP:
+      case TGSI_OPCODE_LRP:
         emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
         break;
 
-      case OPCODE_MAD: 
+      case TGSI_OPCODE_MAD:    
         emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
         break;
 
-      case OPCODE_MOV:
+      case TGSI_OPCODE_MOV:
         emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_MUL:
+      case TGSI_OPCODE_MUL:
         emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_XPD:
+      case TGSI_OPCODE_XPD:
         emit_xpd(p, dst, dst_flags, args[0], args[1]);
         break;
 
         /* Higher math functions:
          */
-      case OPCODE_RCP:
+      case TGSI_OPCODE_RCP:
         emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_RSQ:
+      case TGSI_OPCODE_RSQ:
         emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_SIN:
+      case TGSI_OPCODE_SIN:
         emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_COS:
+      case TGSI_OPCODE_COS:
         emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_EX2:
+      case TGSI_OPCODE_EX2:
         emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_LG2:
+      case TGSI_OPCODE_LG2:
         emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_SCS:
+      case TGSI_OPCODE_SCS:
         /* There is an scs math function, but it would need some
          * fixup for 16-element execution.
          */
@@ -1420,71 +1438,70 @@ void brw_wm_emit( struct brw_wm_compile *c )
            emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
         break;
 
-      case OPCODE_POW:
+      case TGSI_OPCODE_POW:
         emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
         break;
 
         /* Comparisons:
          */
-      case OPCODE_CMP:
+      case TGSI_OPCODE_CMP:
         emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
         break;
 
-      case OPCODE_MAX:
+      case TGSI_OPCODE_MAX:
         emit_max(p, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_MIN:
+      case TGSI_OPCODE_MIN:
         emit_min(p, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_SLT:
+      case TGSI_OPCODE_SLT:
         emit_slt(p, dst, dst_flags, args[0], args[1]);
         break;
 
-      case OPCODE_SLE:
+      case TGSI_OPCODE_SLE:
         emit_sle(p, dst, dst_flags, args[0], args[1]);
        break;
-      case OPCODE_SGT:
+      case TGSI_OPCODE_SGT:
         emit_sgt(p, dst, dst_flags, args[0], args[1]);
        break;
-      case OPCODE_SGE:
+      case TGSI_OPCODE_SGE:
         emit_sge(p, dst, dst_flags, args[0], args[1]);
         break;
-      case OPCODE_SEQ:
+      case TGSI_OPCODE_SEQ:
         emit_seq(p, dst, dst_flags, args[0], args[1]);
        break;
-      case OPCODE_SNE:
+      case TGSI_OPCODE_SNE:
         emit_sne(p, dst, dst_flags, args[0], args[1]);
        break;
 
-      case OPCODE_LIT:
+      case TGSI_OPCODE_LIT:
         emit_lit(p, dst, dst_flags, args[0]);
         break;
 
         /* Texturing operations:
          */
-      case OPCODE_TEX:
+      case TGSI_OPCODE_TEX:
         emit_tex(c, inst, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_TXB:
+      case TGSI_OPCODE_TXB:
         emit_txb(c, inst, dst, dst_flags, args[0]);
         break;
 
-      case OPCODE_KIL:
+      case TGSI_OPCODE_KIL:
         emit_kil(c, args[0]);
         break;
 
-      case OPCODE_KIL_NV:
-        emit_kil_nv(c);
+      case TGSI_OPCODE_KILP:
+        emit_killp(c);
         break;
 
       default:
         debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
-                     inst->opcode, inst->opcode < MAX_OPCODE ?
-                                   _mesa_opcode_string(inst->opcode) :
-                                   "unknown");
+                     inst->opcode, 
+                     tgsi_get_opcode_info(inst->opcode)->mnemonic);
       }
       
       for (i = 0; i < 4; i++)
index d59473073015ee17cb84d54e0845666953ea3b72..8ba037cdae7cfeb2d7142d18e2ab190394a1be52 100644 (file)
@@ -30,9 +30,8 @@
   */
                
 
-#include "pipe/p_shader_constants.h"
+#include "pipe/p_shader_tokens.h"
 
-#include "brw_context.h"
 #include "brw_wm.h"
 #include "brw_util.h"
 
@@ -43,7 +42,7 @@
 #define W    3
 
 
-static const char *wm_opcode_strings[] = {   
+static const char *wm_opcode_strings[] = {
    "PIXELXY",
    "DELTAXY",
    "PIXELW",
@@ -57,143 +56,6 @@ static const char *wm_opcode_strings[] = {
 
 
 
-/***********************************************************************
- * Source regs
- */
-
-static struct prog_src_register src_reg(GLuint file, GLuint idx)
-{
-   struct prog_src_register reg;
-   reg.File = file;
-   reg.Index = idx;
-   reg.Swizzle = SWIZZLE_NOOP;
-   reg.RelAddr = 0;
-   reg.Negate = NEGATE_NONE;
-   reg.Abs = 0;
-   return reg;
-}
-
-static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
-{
-   return src_reg(dst.File, dst.Index);
-}
-
-static struct prog_src_register src_undef( void )
-{
-   return src_reg(PROGRAM_UNDEFINED, 0);
-}
-
-static GLboolean src_is_undef(struct prog_src_register src)
-{
-   return src.File == PROGRAM_UNDEFINED;
-}
-
-static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
-{
-   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
-   return reg;
-}
-
-static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
-{
-   return src_swizzle(reg, x, x, x, x);
-}
-
-static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
-{
-   reg.Swizzle = swizzle;
-   return reg;
-}
-
-
-/***********************************************************************
- * Dest regs
- */
-
-static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
-{
-   struct prog_dst_register reg;
-   reg.File = file;
-   reg.Index = idx;
-   reg.WriteMask = BRW_WRITEMASK_XYZW;
-   reg.RelAddr = 0;
-   reg.CondMask = COND_TR;
-   reg.CondSwizzle = 0;
-   reg.CondSrc = 0;
-   reg.pad = 0;
-   return reg;
-}
-
-static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
-{
-   reg.WriteMask &= mask;
-   return reg;
-}
-
-static struct prog_dst_register dst_undef( void )
-{
-   return dst_reg(PROGRAM_UNDEFINED, 0);
-}
-
-
-
-static struct prog_dst_register get_temp( struct brw_wm_compile *c )
-{
-   int bit = _mesa_ffs( ~c->fp_temp );
-
-   if (!bit) {
-      debug_printf("%s: out of temporaries\n", __FILE__);
-      exit(1);
-   }
-
-   c->fp_temp |= 1<<(bit-1);
-   return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1));
-}
-
-
-static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
-{
-   c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp));
-}
-
-
-/***********************************************************************
- * Instructions 
- */
-
-static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
-{
-   return &c->prog_instructions[c->nr_fp_insns++];
-}
-
-static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
-                                       const struct prog_instruction *inst0)
-{
-   struct prog_instruction *inst = get_fp_inst(c);
-   *inst = *inst0;
-   return inst;
-}
-
-static struct prog_instruction * emit_op(struct brw_wm_compile *c,
-                                        GLuint op,
-                                        struct prog_dst_register dest,
-                                        GLuint saturate,
-                                        struct prog_src_register src0,
-                                        struct prog_src_register src1,
-                                        struct prog_src_register src2 )
-{
-   struct prog_instruction *inst = get_fp_inst(c);
-      
-   memset(inst, 0, sizeof(*inst));
-
-   inst->Opcode = op;
-   inst->DstReg = dest;
-   inst->SaturateMode = saturate;   
-   inst->SrcReg[0] = src0;
-   inst->SrcReg[1] = src1;
-   inst->SrcReg[2] = src2;
-   return inst;
-}
 
 
 /* Many opcodes produce the same value across all the result channels.
@@ -202,32 +64,28 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
  * anyway.  We can easily get both by emitting the opcode to one channel, and
  * then MOVing it to the others, which brw_wm_pass*.c already understands.
  */
-static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
-                                                const struct prog_instruction *inst0)
-{
-   struct prog_instruction *inst;
-   unsigned int dst_chan;
-   unsigned int other_channel_mask;
-
-   if (inst0->DstReg.WriteMask == 0)
-      return NULL;
-
-   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
-   inst = get_fp_inst(c);
-   *inst = *inst0;
-   inst->DstReg.WriteMask = 1 << dst_chan;
-
-   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
-   if (other_channel_mask != 0) {
-      inst = emit_op(c,
-                    TGSI_OPCODE_MOV,
-                    dst_mask(inst0->DstReg, other_channel_mask),
-                    0,
-                    src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
-                    src_undef(),
-                    src_undef());
+static void emit_scalar_insn(struct brw_wm_compile *c,
+                            unsigned opcode,
+                            struct brw_dst dst,
+                            struct brw_src src0,
+                            struct brw_src src1,
+                            struct brw_src src2 )
+{
+   unsigned first_chan = ffs(dst.writemask) - 1;
+   unsigned first_mask = 1 << first_chan;
+
+   if (dst.writemask == 0)
+      return;
+
+   emit_op( c, opcode,
+           brw_writemask(dst, first_mask),
+           src0, src1, src2 );
+
+   if (dst.writemask != first_mask) {
+      emit_op1(c, TGSI_OPCODE_MOV,
+              brw_writemask(dst, ~first_mask),
+              src_swizzle1(brw_src(dst), first_chan));
    }
-   return inst;
 }
 
 
@@ -235,11 +93,11 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
  * Special instructions for interpolation and other tasks
  */
 
-static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
 {
    if (src_is_undef(c->pixel_xy)) {
-      struct prog_dst_register pixel_xy = get_temp(c);
-      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      struct ureg_dst pixel_xy = get_temp(c);
+      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       
       /* Emit the out calculations, and hold onto the results.  Use
@@ -250,7 +108,6 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
       emit_op(c,
              WM_PIXELXY,
              dst_mask(pixel_xy, BRW_WRITEMASK_XY),
-             0,
              payload_r0_depth,
              src_undef(),
              src_undef());
@@ -261,19 +118,18 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
    return c->pixel_xy;
 }
 
-static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+static struct ureg_src get_delta_xy( struct brw_wm_compile *c )
 {
    if (src_is_undef(c->delta_xy)) {
-      struct prog_dst_register delta_xy = get_temp(c);
-      struct prog_src_register pixel_xy = get_pixel_xy(c);
-      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      struct ureg_dst delta_xy = get_temp(c);
+      struct ureg_src pixel_xy = get_pixel_xy(c);
+      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       /* deltas.xy = DELTAXY pixel_xy, payload[0]
        */
       emit_op(c,
              WM_DELTAXY,
              dst_mask(delta_xy, BRW_WRITEMASK_XY),
-             0,
              pixel_xy, 
              payload_r0_depth,
              src_undef());
@@ -284,19 +140,18 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
    return c->delta_xy;
 }
 
-static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+static struct ureg_src get_pixel_w( struct brw_wm_compile *c )
 {
    if (src_is_undef(c->pixel_w)) {
-      struct prog_dst_register pixel_w = get_temp(c);
-      struct prog_src_register deltas = get_delta_xy(c);
-      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+      struct ureg_dst pixel_w = get_temp(c);
+      struct ureg_src deltas = get_delta_xy(c);
+      struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS);
 
       /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
        */
       emit_op(c,
              WM_PIXELW,
              dst_mask(pixel_w, BRW_WRITEMASK_W),
-             0,
              interp_wpos,
              deltas, 
              src_undef());
@@ -313,9 +168,9 @@ static void emit_interp( struct brw_wm_compile *c,
                         GLuint semantic_index,
                         GLuint interp_mode )
 {
-   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
-   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-   struct prog_src_register deltas = get_delta_xy(c);
+   struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+   struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx);
+   struct ureg_src deltas = get_delta_xy(c);
 
    /* Need to use PINTERP on attributes which have been
     * multiplied by 1/W in the SF program, and LINTERP on those
@@ -325,271 +180,197 @@ static void emit_interp( struct brw_wm_compile *c,
    case FRAG_ATTRIB_WPOS:
       /* Have to treat wpos.xy specially:
        */
-      emit_op(c,
+      emit_op1(c,
              WM_WPOSXY,
              dst_mask(dst, BRW_WRITEMASK_XY),
-             0,
-             get_pixel_xy(c),
-             src_undef(),
-             src_undef());
+             get_pixel_xy(c));
       
-      dst = dst_mask(dst, BRW_WRITEMASK_ZW);
-
-      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+      /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
        */
-      emit_op(c,
-             WM_LINTERP,
-             dst,
-             0,
-             interp,
-             deltas,
-             src_undef());
+      emit_op2(c,
+              WM_LINTERP,
+              dst_mask(dst, BRW_WRITEMASK_ZW),
+              interp,
+              deltas);
       break;
 
    case TGSI_SEMANTIC_COLOR:
       if (c->key.flat_shade) {
-        emit_op(c,
+        emit_op1(c,
                 WM_CINTERP,
                 dst,
-                0,
-                interp,
-                src_undef(),
-                src_undef());
+                interp);
+      }
+      else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
+        emit_op2(c,
+                 WM_LINTERP,
+                 dst,
+                 interp,
+                 deltas);
       }
       else {
-        emit_op(c,
-                translate_interp_mode(interp_mode),
-                dst,
-                0,
-                interp,
-                deltas,
-                src_undef());
+        emit_op3(c,
+                 WM_PINTERP,
+                 dst,
+                 interp,
+                 deltas,
+                 get_pixel_w(c));
       }
+
       break;
    case FRAG_ATTRIB_FOGC:
       /* Interpolate the fog coordinate */
-      emit_op(c,
+      emit_op3(c,
              WM_PINTERP,
              dst_mask(dst, BRW_WRITEMASK_X),
-             0,
              interp,
              deltas,
              get_pixel_w(c));
 
-      emit_op(c,
+      emit_op1(c,
              TGSI_OPCODE_MOV,
-             dst_mask(dst, BRW_WRITEMASK_YZW),
-             0,
-             src_swizzle(interp,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ONE),
-             src_undef(),
-             src_undef());
+             dst_mask(dst, BRW_WRITEMASK_YZ),
+             brw_imm1f(0.0));
+
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_W),
+             brw_imm1f(1.0));
       break;
 
    case FRAG_ATTRIB_FACE:
       /* XXX review/test this case */
-      emit_op(c,
-              WM_FRONTFACING,
-              dst_mask(dst, BRW_WRITEMASK_X),
-              0,
-              src_undef(),
-              src_undef(),
-              src_undef());
+      emit_op0(c,
+              WM_FRONTFACING,
+              dst_mask(dst, BRW_WRITEMASK_X));
+      
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_YZ),
+             brw_imm1f(0.0));
+
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_W),
+             brw_imm1f(1.0));
       break;
 
    case FRAG_ATTRIB_PNTC:
       /* XXX review/test this case */
-      emit_op(c,
-             WM_PINTERP,
-             dst_mask(dst, BRW_WRITEMASK_XY),
-             0,
-             interp,
-             deltas,
-             get_pixel_w(c));
-
-      emit_op(c,
+      emit_op3(c,
+              WM_PINTERP,
+              dst_mask(dst, BRW_WRITEMASK_XY),
+              interp,
+              deltas,
+              get_pixel_w(c));
+
+      emit_op1(c,
              TGSI_OPCODE_MOV,
-             dst_mask(dst, BRW_WRITEMASK_ZW),
-             0,
-             src_swizzle(interp,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ZERO,
-                         SWIZZLE_ONE),
-             src_undef(),
-             src_undef());
-      break;
+             dst_mask(dst, BRW_WRITEMASK_Z),
+             brw_imm1f(c->pass_fp, 0.0f));
 
-   default:
-      emit_op(c,
-             translate_interp_mode(interp_mode),
-             dst,
-             0,
-             interp,
-             deltas,
-             get_pixel_w(c));
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_W),
+             brw_imm1f(c->pass_fp, 1.0f));
       break;
-   }
-}
-
-/***********************************************************************
- * Hacks to extend the program parameter and constant lists.
- */
-
-/* Add the fog parameters to the parameter list of the original
- * program, rather than creating a new list.  Doesn't really do any
- * harm and it's not as if the parameter handling isn't a big hack
- * anyway.
- */
-static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 
-                                                     GLint s0,
-                                                     GLint s1,
-                                                     GLint s2,
-                                                     GLint s3,
-                                                     GLint s4)
-{
-   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   gl_state_index tokens[STATE_LENGTH];
-   GLuint idx;
-   tokens[0] = s0;
-   tokens[1] = s1;
-   tokens[2] = s2;
-   tokens[3] = s3;
-   tokens[4] = s4;
-   
-   for (idx = 0; idx < paramList->NumParameters; idx++) {
-      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
-         memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
-        return src_reg(PROGRAM_STATE_VAR, idx);
-   }
-
-   idx = _mesa_add_state_reference( paramList, tokens );
-
-   return src_reg(PROGRAM_STATE_VAR, idx);
-}
 
+   default: 
+      switch (interp_mode) {
+      case TGSI_INTERPOLATE_CONSTANT:
+        emit_op1(c,
+                 WM_CINTERP,
+                 dst,
+                 interp);
+        break;
 
-static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 
-                                                    GLfloat s0,
-                                                    GLfloat s1,
-                                                    GLfloat s2,
-                                                    GLfloat s3)
-{
-   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   GLfloat values[4];
-   GLuint idx;
-   GLuint swizzle;
-
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
-
-   /* Have to search, otherwise multiple compilations will each grow
-    * the parameter list.
-    */
-   for (idx = 0; idx < paramList->NumParameters; idx++) {
-      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
-         memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
+      case TGSI_INTERPOLATE_LINEAR:
+        emit_op2(c,
+                 WM_LINTERP,
+                 dst,
+                 interp,
+                 deltas);
+        break;
 
-        /* XXX: this mimics the mesa bug which puts all constants and
-         * parameters into the "PROGRAM_STATE_VAR" category:
-         */
-        return src_reg(PROGRAM_STATE_VAR, idx);
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+        emit_op3(c,
+                 WM_PINTERP,
+                 dst,
+                 interp,
+                 deltas,
+                 get_pixel_w(c));
+        break;
+      }
+      break;
    }
-   
-   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
-   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
-   return src_reg(PROGRAM_STATE_VAR, idx);
 }
 
 
-
 /***********************************************************************
  * Expand various instructions here to simpler forms.  
  */
 static void precalc_dst( struct brw_wm_compile *c,
-                              const struct prog_instruction *inst )
+                        struct brw_dst dst,
+                        struct brw_src src0,
+                        struct brw_src src1 )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
-   struct prog_src_register src1 = inst->SrcReg[1];
-   struct prog_dst_register dst = inst->DstReg;
-   
    if (dst.WriteMask & BRW_WRITEMASK_Y) {      
       /* dst.y = mul src0.y, src1.y
        */
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             dst_mask(dst, BRW_WRITEMASK_Y),
-             inst->SaturateMode,
-             src0,
-             src1,
-             src_undef());
+      emit_op2(c,
+              TGSI_OPCODE_MUL,
+              dst_mask(dst, BRW_WRITEMASK_Y),
+              src0,
+              src1);
    }
 
    if (dst.WriteMask & BRW_WRITEMASK_XZ) {
       struct prog_instruction *swz;
       GLuint z = GET_SWZ(src0.Swizzle, Z);
 
-      /* dst.xz = swz src0.1zzz
+      /* dst.z = mov src0.zzzz
+       */
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             dst_mask(dst, BRW_WRITEMASK_Z),
+             src_swizzle1(src0, Z));
+
+      /* dst.x = immf(1.0)
        */
-      swz = emit_op(c,
-                   TGSI_OPCODE_MOV,
-                   dst_mask(dst, BRW_WRITEMASK_XZ),
-                   inst->SaturateMode,
-                   src_swizzle(src0, SWIZZLE_ONE, z, z, z),
-                   src_undef(),
-                   src_undef());
-      /* Avoid letting negation flag of src0 affect our 1 constant. */
-      swz->SrcReg[0].Negate &= ~NEGATE_X;
+      emit_op1(c,
+             TGSI_OPCODE_MOV,
+             brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+             src_immf(c, 1.0));
    }
    if (dst.WriteMask & BRW_WRITEMASK_W) {
       /* dst.w = mov src1.w
        */
-      emit_op(c,
-             TGSI_OPCODE_MOV,
-             dst_mask(dst, BRW_WRITEMASK_W),
-             inst->SaturateMode,
-             src1,
-             src_undef(),
-             src_undef());
+      emit_op1(c,
+              TGSI_OPCODE_MOV,
+              dst_mask(dst, BRW_WRITEMASK_W),
+              src1);
    }
 }
 
 
 static void precalc_lit( struct brw_wm_compile *c,
-                        const struct prog_instruction *inst )
+                        struct ureg_dst dst,
+                        struct ureg_src src0 )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
-   struct prog_dst_register dst = inst->DstReg;
-   
    if (dst.WriteMask & BRW_WRITEMASK_XW) {
-      struct prog_instruction *swz;
-
-      /* dst.xw = swz src0.1111
+      /* dst.xw = imm(1.0f)
        */
-      swz = emit_op(c,
-                   TGSI_OPCODE_MOV,
-                   dst_mask(dst, BRW_WRITEMASK_XW),
-                   0,
-                   src_swizzle1(src0, SWIZZLE_ONE),
-                   src_undef(),
-                   src_undef());
-      /* Avoid letting the negation flag of src0 affect our 1 constant. */
-      swz->SrcReg[0].Negate = NEGATE_NONE;
+      emit_op1(c,
+              TGSI_OPCODE_MOV,
+              brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0),
+              brw_imm1f(1.0f));
    }
 
    if (dst.WriteMask & BRW_WRITEMASK_YZ) {
-      emit_op(c,
-             TGSI_OPCODE_LIT,
-             dst_mask(dst, BRW_WRITEMASK_YZ),
-             inst->SaturateMode,
-             src0,
-             src_undef(),
-             src_undef());
+      emit_op1(c,
+              TGSI_OPCODE_LIT,
+              brw_writemask(dst, BRW_WRITEMASK_YZ),
+              src0);
    }
 }
 
@@ -601,99 +382,62 @@ static void precalc_lit( struct brw_wm_compile *c,
  * instruction itself.
  */
 static void precalc_tex( struct brw_wm_compile *c,
-                        const struct prog_instruction *inst )
+                        struct brw_dst dst,
+                        unsigned unit,
+                        struct brw_src src0 )
 {
-   struct prog_src_register coord;
-   struct prog_dst_register tmpcoord;
-   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+   struct ureg_src coord = src_undef();
+   struct ureg_dst tmp = dst_undef();
 
    assert(unit < BRW_MAX_TEX_UNIT);
 
+   /* Cubemap: find longest component of coord vector and normalize
+    * it.
+    */
    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
-       struct prog_instruction *out;
-       struct prog_dst_register tmp0 = get_temp(c);
-       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
-       struct prog_dst_register tmp1 = get_temp(c);
-       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
-       struct prog_src_register src0 = inst->SrcReg[0];
-
-       /* find longest component of coord vector and normalize it */
-       tmpcoord = get_temp(c);
-       coord = src_reg_from_dst(tmpcoord);
-
-       /* tmpcoord = src0 (i.e.: coord = src0) */
-       out = emit_op(c, TGSI_OPCODE_MOV,
-                     tmpcoord,
-                     0,
-                     src0,
-                     src_undef(),
-                     src_undef());
-       out->SrcReg[0].Negate = NEGATE_NONE;
-       out->SrcReg[0].Abs = 1;
-
-       /* tmp0 = MAX(coord.X, coord.Y) */
-       emit_op(c, TGSI_OPCODE_MAX,
-               tmp0,
-               0,
-               src_swizzle1(coord, X),
-               src_swizzle1(coord, Y),
-               src_undef());
-
-       /* tmp1 = MAX(tmp0, coord.Z) */
-       emit_op(c, TGSI_OPCODE_MAX,
-               tmp1,
-               0,
-               tmp0src,
-               src_swizzle1(coord, Z),
-               src_undef());
-
-       /* tmp0 = 1 / tmp1 */
-       emit_op(c, TGSI_OPCODE_RCP,
-               dst_mask(tmp0, BRW_WRITEMASK_X),
-               0,
-               tmp1src,
-               src_undef(),
-               src_undef());
-
-       /* tmpCoord = src0 * tmp0 */
-       emit_op(c, TGSI_OPCODE_MUL,
-               tmpcoord,
-               0,
-               src0,
-               src_swizzle1(tmp0src, SWIZZLE_X),
-               src_undef());
-
-       release_temp(c, tmp0);
-       release_temp(c, tmp1);
+      struct ureg_src tmpsrc;
+
+      tmp = get_temp(c);
+      tmpsrc = brw_src(tmpcoord)
+
+      /* tmp = abs(src0) */
+      emit_op1(c, 
+              TGSI_OPCODE_MOV,
+              tmp,
+              brw_abs(src0));
+
+      /* tmp.X = MAX(tmp.X, tmp.Y) */
+      emit_op2(c, TGSI_OPCODE_MAX,
+              brw_writemask(tmp, BRW_WRITEMASK_X),
+              src_swizzle1(tmpsrc, X),
+              src_swizzle1(tmpsrc, Y));
+
+      /* tmp.X = MAX(tmp.X, tmp.Z) */
+      emit_op2(c, TGSI_OPCODE_MAX,
+              brw_writemask(tmp, BRW_WRITEMASK_X),
+              tmpsrc,
+              src_swizzle1(tmpsrc, Z));
+
+      /* tmp.X = 1 / tmp.X */
+      emit_op1(c, TGSI_OPCODE_RCP,
+             dst_mask(tmp, BRW_WRITEMASK_X),
+             tmpsrc);
+
+      /* tmp = src0 * tmp.xxxx */
+      emit_op2(c, TGSI_OPCODE_MUL,
+              tmp,
+              src0,
+              src_swizzle1(tmpsrc, SWIZZLE_X));
+
+      coord = tmpsrc;
    }
    else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
-      struct prog_src_register scale = 
-        search_or_add_param5( c, 
-                              STATE_INTERNAL, 
-                              STATE_TEXRECT_SCALE,
-                              unit,
-                              0,0 );
-
-      tmpcoord = get_temp(c);
-
-      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
+      /* XXX: need a mechanism for internally generated constants.
        */
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             tmpcoord,
-             0,
-             inst->SrcReg[0],
-             src_swizzle(scale,
-                         SWIZZLE_X,
-                         SWIZZLE_Y,
-                         SWIZZLE_ONE,
-                         SWIZZLE_ONE),
-             src_undef());
-
-      coord = src_reg_from_dst(tmpcoord);
+      coord = src0;
    }
    else {
-      coord = inst->SrcReg[0];
+      coord = src0;
    }
 
    /* Need to emit YUV texture conversions by hand.  Probably need to
@@ -704,58 +448,36 @@ static void precalc_tex( struct brw_wm_compile *c,
    if (c->key.yuvtex_mask & (1 << unit)) {
       /* convert ycbcr to RGBA */
       GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
-
-      /* 
-        CONST C0 = { -.5, -.0625,  -.5, 1.164 }
-        CONST C1 = { 1.596, -0.813, 2.018, -.391 }
-        UYV     = TEX ...
-        UYV.xyz = ADD UYV,     C0
-        UYV.y   = MUL UYV.y,   C0.w
-        if (UV swaped)
-           RGB.xyz = MAD UYV.zzx, C1,   UYV.y
-        else
-           RGB.xyz = MAD UYV.xxz, C1,   UYV.y 
-        RGB.y   = MAD UYV.z,   C1.w, RGB.y
-      */
-      struct prog_dst_register dst = inst->DstReg;
-      struct prog_dst_register tmp = get_temp(c);
-      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
-      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
-      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+      struct ureg_dst dst = inst->DstReg;
+      struct ureg_dst tmp = get_temp(c);
+      struct ureg_src tmpsrc = src_reg_from_dst(tmp);
+      struct ureg_src C0 = ureg_imm4f( c->ureg,  -.5, -.0625, -.5, 1.164 );
+      struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 );
      
       /* tmp     = TEX ...
        */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  tmp,
-                  inst->SaturateMode,
+                  brw_saturate(tmp, dst.Saturate),
                   unit,
                   inst->TexSrcTarget,
-                  inst->TexShadow,
                   coord,
                   src_undef(),
                   src_undef());
 
       /* tmp.xyz =  ADD TMP, C0
        */
-      emit_op(c,
-             TGSI_OPCODE_ADD,
-             dst_mask(tmp, BRW_WRITEMASK_XYZ),
-             0,
-             tmpsrc,
-             C0,
-             src_undef());
+      emit_op2(c, TGSI_OPCODE_ADD,
+              dst_mask(tmp, BRW_WRITEMASK_XYZ),
+              tmpsrc,
+              C0);
 
       /* YUV.y   = MUL YUV.y, C0.w
        */
-
-      emit_op(c,
-             TGSI_OPCODE_MUL,
-             dst_mask(tmp, BRW_WRITEMASK_Y),
-             0,
-             tmpsrc,
-             src_swizzle1(C0, W),
-             src_undef());
+      emit_op2(c, TGSI_OPCODE_MUL,
+              dst_mask(tmp, BRW_WRITEMASK_Y),
+              tmpsrc,
+              src_swizzle1(C0, W));
 
       /* 
        * if (UV swaped)
@@ -764,23 +486,22 @@ static void precalc_tex( struct brw_wm_compile *c,
        *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
        */
 
-      emit_op(c,
-             TGSI_OPCODE_MAD,
-             dst_mask(dst, BRW_WRITEMASK_XYZ),
-             0,
-             swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
-             C1,
-             src_swizzle1(tmpsrc, Y));
+      emit_op3(c, TGSI_OPCODE_MAD,
+              dst_mask(dst, BRW_WRITEMASK_XYZ),
+              ( swap_uv ? 
+                src_swizzle(tmpsrc, Z,Z,X,X) : 
+                src_swizzle(tmpsrc, X,X,Z,Z)),
+              C1,
+              src_swizzle1(tmpsrc, Y));
 
       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
        */
-      emit_op(c,
-             TGSI_OPCODE_MAD,
-             dst_mask(dst, BRW_WRITEMASK_Y),
-             0,
-             src_swizzle1(tmpsrc, Z),
-             src_swizzle1(C1, W),
-             src_swizzle1(src_reg_from_dst(dst), Y));
+      emit_op3(c,
+              TGSI_OPCODE_MAD,
+              dst_mask(dst, BRW_WRITEMASK_Y),
+              src_swizzle1(tmpsrc, Z),
+              src_swizzle1(C1, W),
+              src_swizzle1(src_reg_from_dst(dst), Y));
 
       release_temp(c, tmp);
    }
@@ -789,29 +510,20 @@ static void precalc_tex( struct brw_wm_compile *c,
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
                   inst->DstReg,
-                  inst->SaturateMode,
                   unit,
                   inst->TexSrcTarget,
-                  inst->TexShadow,
                   coord,
                   src_undef(),
                   src_undef());
    }
 
-   /* For GL_EXT_texture_swizzle: */
-   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
-      /* swizzle the result of the TEX instruction */
-      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
-      emit_op(c, TGSI_OPCODE_MOV,
-              inst->DstReg,
-              SATURATE_OFF, /* saturate already done above */
-              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
-              src_undef(),
-              src_undef());
-   }
+   /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
+    * generating shader varients in mesa state tracker.
+    */
 
-   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
-       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
+   /* Release this temp if we ended up allocating it:
+    */
+   if (!brw_dst_is_undef(tmpcoord))
       release_temp(c, tmpcoord);
 }
 
@@ -822,7 +534,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 static GLboolean projtex( struct brw_wm_compile *c,
                          const struct prog_instruction *inst )
 {
-   const struct prog_src_register src = inst->SrcReg[0];
+   const struct ureg_src src = inst->SrcReg[0];
    GLboolean retVal;
 
    assert(inst->Opcode == TGSI_OPCODE_TXP);
@@ -836,7 +548,7 @@ static GLboolean projtex( struct brw_wm_compile *c,
     */
    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
       retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
-   else if (src.File == PROGRAM_INPUT && 
+   else if (src.File == TGSI_FILE_INPUT && 
            GET_SWZ(src.Swizzle, W) == W &&
             (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
       retVal = GL_FALSE;
@@ -853,10 +565,10 @@ static GLboolean projtex( struct brw_wm_compile *c,
 static void precalc_txp( struct brw_wm_compile *c,
                               const struct prog_instruction *inst )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
+   struct ureg_src src0 = inst->SrcReg[0];
 
    if (projtex(c, inst)) {
-      struct prog_dst_register tmp = get_temp(c);
+      struct ureg_dst tmp = get_temp(c);
       struct prog_instruction tmp_inst;
 
       /* tmp0.w = RCP inst.arg[0][3]
@@ -864,7 +576,6 @@ static void precalc_txp( struct brw_wm_compile *c,
       emit_op(c,
              TGSI_OPCODE_RCP,
              dst_mask(tmp, BRW_WRITEMASK_W),
-             0,
              src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
              src_undef(),
              src_undef());
@@ -874,7 +585,6 @@ static void precalc_txp( struct brw_wm_compile *c,
       emit_op(c,
              TGSI_OPCODE_MUL,
              dst_mask(tmp, BRW_WRITEMASK_XYZ),
-             0,
              src0,
              src_swizzle1(src_reg_from_dst(tmp), W),
              src_undef());
@@ -899,43 +609,30 @@ static void precalc_txp( struct brw_wm_compile *c,
 
 static void emit_fb_write( struct brw_wm_compile *c )
 {
-   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
-   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
-   struct prog_src_register outcolor;
+   struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH);
+   struct ureg_src outcolor;
+   struct prog_instruction *inst;
    GLuint i;
 
-   struct prog_instruction *inst, *last_inst;
-   struct brw_context *brw = c->func.brw;
 
    /* The inst->Aux field is used for FB write target and the EOT marker */
 
-   if (brw->state.nr_color_regions > 1) {
-      for (i = 0 ; i < brw->state.nr_color_regions; i++) {
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
-         last_inst = inst = emit_op(c,
-                                    WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
-                                    outcolor, payload_r0_depth, outdepth);
-         inst->Aux = (i<<1);
-         if (c->fp_fragcolor_emitted) {
-            outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
-            last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
-                                       0, outcolor, payload_r0_depth, outdepth);
-            inst->Aux = (i<<1);
-         }
-      }
-      last_inst->Aux |= 1; //eot
-   }
-   else {
-      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
-      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
-      else 
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
-
-      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
-                     0, outcolor, payload_r0_depth, outdepth);
-      inst->Aux = 1|(0<<1);
+   for (i = 0 ; i < c->key.nr_cbufs; i++) {
+      outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
+
+      inst = emit_op(c, WM_FB_WRITE,
+                    dst_mask(dst_undef(), 0),
+                    outcolor,
+                    payload_r0_depth,
+                    outdepth);
+
+      inst->Aux = (i<<1);
    }
+   /* Set EOT flag on last inst:
+    */
+   inst->Aux |= 1; //eot
 }
 
 
@@ -952,7 +649,7 @@ static void validate_src_regs( struct brw_wm_compile *c,
    GLuint i;
 
    for (i = 0; i < nr_args; i++) {
-      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
+      if (inst->SrcReg[i].File == TGSI_FILE_INPUT) {
         GLuint idx = inst->SrcReg[i].Index;
         if (!(c->fp_interp_emitted & (1<<idx))) {
            emit_interp(c, idx);
@@ -965,34 +662,86 @@ static void validate_src_regs( struct brw_wm_compile *c,
 static void validate_dst_regs( struct brw_wm_compile *c,
                               const struct prog_instruction *inst )
 {
-   if (inst->DstReg.File == PROGRAM_OUTPUT) {
+   if (inst->DstReg.File == TGSI_FILE_OUTPUT) {
       GLuint idx = inst->DstReg.Index;
       if (idx == FRAG_RESULT_COLOR)
-         c->fp_fragcolor_emitted = 1;
+         c->fp_fragcolor_emitted |= inst->DstReg.WriteMask;
    }
 }
 
-static void print_insns( const struct prog_instruction *insn,
-                        GLuint nr )
+
+
+static void emit_insn( struct brw_wm_compile *c,
+                      const struct tgsi_full_instruction *inst )
 {
-   GLuint i;
-   for (i = 0; i < nr; i++, insn++) {
-      debug_printf("%3d: ", i);
-      if (insn->Opcode < MAX_OPCODE)
-        _mesa_print_instruction(insn);
-      else if (insn->Opcode < MAX_WM_OPCODE) {
-        GLuint idx = insn->Opcode - MAX_OPCODE;
-
-        _mesa_print_alu_instruction(insn,
-                                    wm_opcode_strings[idx],
-                                    3);
-      }
-      else 
-        debug_printf("965 Opcode %d\n", insn->Opcode);
+
+   switch (inst->Opcode) {
+   case TGSI_OPCODE_ABS:
+      emit_op1(c, TGSI_OPCODE_MOV,
+              dst, 
+              brw_abs(src[0]));
+      break;
+
+   case TGSI_OPCODE_SUB: 
+      emit_op2(c, TGSI_OPCODE_ADD,
+              dst,
+              src[0],
+              brw_negate(src[1]));
+      break;
+
+   case TGSI_OPCODE_SCS: 
+      emit_op1(c, TGSI_OPCODE_SCS,
+              brw_writemask(dst, BRW_WRITEMASK_XY),
+              src[0]);
+      break;
+        
+   case TGSI_OPCODE_DST:
+      precalc_dst(c, inst);
+      break;
+
+   case TGSI_OPCODE_LIT:
+      precalc_lit(c, inst);
+      break;
+
+   case TGSI_OPCODE_TEX:
+      precalc_tex(c, inst);
+      break;
+
+   case TGSI_OPCODE_TXP:
+      precalc_txp(c, inst);
+      break;
+
+   case TGSI_OPCODE_TXB:
+      out = emit_insn(c, inst);
+      out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+      assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+      break;
+
+   case TGSI_OPCODE_XPD: 
+      emit_op2(c, TGSI_OPCODE_XPD,
+              brw_writemask(dst, BRW_WRITEMASK_XYZ),
+              src[0], 
+              src[1]);
+      break;
+
+   case TGSI_OPCODE_KIL: 
+      emit_op1(c, TGSI_OPCODE_KIL,
+              brw_writemask(dst_undef(), 0),
+              src[0]);
+      break;
+
+   case TGSI_OPCODE_END:
+      emit_fb_write(c);
+      break;
+   default:
+      if (brw_wm_is_scalar_result(inst->Opcode))
+        emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
+      else
+        emit_op(c, opcode, dst, src[0], src[1], src[2]);
+      break;
    }
 }
 
-
 /**
  * Initial pass for fragment program code generation.
  * This function is used by both the GLSL and non-GLSL paths.
@@ -1004,108 +753,62 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pre-fp:\n");
-      _mesa_print_program(&fp->program.Base); 
-      debug_printf("\n");
+      tgsi_dump(fp->tokens, 0); 
    }
 
-   c->pixel_xy = src_undef();
-   c->delta_xy = src_undef();
-   c->pixel_w = src_undef();
+   c->pixel_xy = brw_src_undef();
+   c->delta_xy = brw_src_undef();
+   c->pixel_w = brw_src_undef();
    c->nr_fp_insns = 0;
    c->fp->tex_units_used = 0x0;
 
-   /* Emit preamble instructions.  This is where special instructions such as
-    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
-    * compute shader inputs from varying vars.
-    */
-   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
-      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
-      validate_src_regs(c, inst);
-      validate_dst_regs(c, inst);
-   }
 
    /* Loop over all instructions doing assorted simplifications and
     * transformations.
     */
-   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
-      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
-      struct prog_instruction *out;
-
-      /* Check for INPUT values, emit INTERP instructions where
-       * necessary:
-       */
-
-      switch (inst->Opcode) {
-      case TGSI_OPCODE_ABS:
-        out = emit_insn(c, inst);
-        out->Opcode = TGSI_OPCODE_MOV;
-        out->SrcReg[0].Negate = NEGATE_NONE;
-        out->SrcReg[0].Abs = 1;
-        break;
-
-      case TGSI_OPCODE_SUB: 
-        out = emit_insn(c, inst);
-        out->Opcode = TGSI_OPCODE_ADD;
-        out->SrcReg[1].Negate ^= NEGATE_XYZW;
-        break;
-
-      case TGSI_OPCODE_SCS: 
-        out = emit_insn(c, inst);
-        /* This should probably be done in the parser. 
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+        /* If branching shader, emit preamble instructions at decl time, as
+         * instruction order in the shader does not correspond to the order
+         * instructions are executed in the wild.
+         *
+         * This is where special instructions such as WM_CINTERP,
+         * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute
+         * shader inputs from varying vars.
+         *
+         * XXX: For non-branching shaders, consider deferring variable
+         * initialization as late as possible to minimize register
+         * usage.  This is how the original BRW driver worked.
          */
-        out->DstReg.WriteMask &= BRW_WRITEMASK_XY;
-        break;
-        
-      case TGSI_OPCODE_DST:
-        precalc_dst(c, inst);
-        break;
-
-      case TGSI_OPCODE_LIT:
-        precalc_lit(c, inst);
-        break;
-
-      case TGSI_OPCODE_TEX:
-        precalc_tex(c, inst);
-        break;
-
-      case TGSI_OPCODE_TXP:
-        precalc_txp(c, inst);
-        break;
-
-      case TGSI_OPCODE_TXB:
-        out = emit_insn(c, inst);
-        out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
-        break;
-
-      case TGSI_OPCODE_XPD: 
-        out = emit_insn(c, inst);
-        /* This should probably be done in the parser. 
+        validate_src_regs(c, inst);
+        validate_dst_regs(c, inst);
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+        /* Unlike VS programs we can probably manage fine encoding
+         * immediate values directly into the emitted EU
+         * instructions, as we probably only need to reference one
+         * float value per instruction.  Just save the data for now
+         * and use directly later.
          */
-        out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ;
         break;
 
-      case TGSI_OPCODE_KIL: 
-        out = emit_insn(c, inst);
-        /* This should probably be done in the parser. 
-         */
-        out->DstReg.WriteMask = 0;
-        break;
-      case TGSI_OPCODE_END:
-        emit_fb_write(c);
-        break;
-      default:
-        if (brw_wm_is_scalar_result(inst->Opcode))
-           emit_scalar_insn(c, inst);
-        else
-           emit_insn(c, inst);
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         inst = &parse.FullToken.FullInstruction;
+        emit_insn( c, inst );
         break;
       }
    }
 
+   c->brw_program = brw_finalize( c->builder );
+
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pass_fp:\n");
-      print_insns( c->prog_instructions, c->nr_fp_insns );
+      brw_print_program( c->brw_program );
       debug_printf("\n");
    }
 }
index 59bc4ef701e3ba3f302e0f6413c728425108ef32..cdc10484a6ebb5396dbaa74db0b8097532fc950a 100644 (file)
@@ -332,7 +332,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
          for (j = 0; j < 4; j++)
             set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
        }
-       if (c->key.vp_outputs_written & (1 << i)) {
+       if (c->key.nr_vp_outputs > i) {
          reg_index += 2;
        }
     }
@@ -1670,7 +1670,7 @@ get_argument_regs(struct brw_wm_compile *c,
     }
 }
 
-static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
+static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c)
 {
 #define MAX_IF_DEPTH 32
 #define MAX_LOOP_DEPTH 32
@@ -1943,20 +1943,20 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
  * Do GPU code generation for shaders that use GLSL features such as
  * flow control.  Other shaders will be compiled with the 
  */
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
 {
     if (BRW_DEBUG & DEBUG_WM) {
-        debug_printf("brw_wm_glsl_emit:\n");
+       debug_printf("%s:\n", __FUNCTION__);
     }
 
     /* initial instruction translation/simplification */
     brw_wm_pass_fp(c);
 
     /* actual code generation */
-    brw_wm_emit_glsl(brw, c);
+    brw_wm_emit_branching_shader(brw, c);
 
     if (BRW_DEBUG & DEBUG_WM) {
-        brw_wm_print_program(c, "brw_wm_glsl_emit done");
+        brw_wm_print_program(c, "brw_wm_branching_shader_emit done");
     }
 
     c->prog_data.total_grf = num_grf_used(c);
index 71e4c56835618a4b7f74c47c21791b75cb73c61b..d8b9028927ffb63521dd2f7b584b99e48abfb687 100644 (file)
@@ -168,54 +168,20 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 
    if (!ref) {
       switch (file) {
-      case PROGRAM_INPUT:
-      case PROGRAM_PAYLOAD:
-      case PROGRAM_TEMPORARY:
-      case PROGRAM_OUTPUT:
-      case PROGRAM_VARYING:
+      case TGSI_FILE_INPUT:
+      case TGSI_FILE_TEMPORARY:
+      case TGSI_FILE_OUTPUT:
+      case BRW_FILE_PAYLOAD:
+        /* should already be done?? */
         break;
 
-      case PROGRAM_LOCAL_PARAM:
-        ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
-        break;
-
-      case PROGRAM_ENV_PARAM:
+      case TGSI_FILE_CONSTANT:
         ref = get_param_ref(c, &c->env_param[idx][component]);
         break;
 
-      case PROGRAM_STATE_VAR:
-      case PROGRAM_UNIFORM:
-      case PROGRAM_CONSTANT:
-      case PROGRAM_NAMED_PARAM: {
-        struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
-        
-        /* There's something really hokey about parameters parsed in
-         * arb programs - they all end up in here, whether they be
-         * state values, parameters or constants.  This duplicates the
-         * structure above & also seems to subvert the limits set for
-         * each type of constant/param.
-         */ 
-        switch (plist->Parameters[idx].Type) {
-        case PROGRAM_NAMED_PARAM:
-        case PROGRAM_CONSTANT:
-           /* These are invarient:
-            */
-           ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
-           break;
-
-        case PROGRAM_STATE_VAR:
-        case PROGRAM_UNIFORM:
-           /* These may change from run to run:
-            */
-           ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
-           break;
-
-        default:
-           assert(0);
-           break;
-        }
+      case TGSI_FILE_IMMEDIATE:
+        ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
         break;
-      }
 
       default:
         assert(0);
@@ -310,17 +276,16 @@ translate_insn(struct brw_wm_compile *c,
                const struct prog_instruction *inst)
 {
    struct brw_wm_instruction *out = get_instruction(c);
-   GLuint writemask = inst->DstReg.WriteMask;
+   GLuint writemask = inst->dst.WriteMask;
    GLuint nr_args = brw_wm_nr_args(inst->Opcode);
    GLuint i, j;
 
    /* Copy some data out of the instruction
     */
    out->opcode = inst->Opcode;
-   out->saturate = (inst->SaturateMode != SATURATE_OFF);
+   out->saturate = inst->dst.Saturate;
    out->tex_unit = inst->TexSrcUnit;
-   out->tex_idx = inst->TexSrcTarget;
-   out->tex_shadow = inst->TexShadow;
+   out->tex_target = inst->TexSrcTarget;
    out->eot = inst->Aux & 1;
    out->target = inst->Aux >> 1;
 
@@ -328,7 +293,7 @@ translate_insn(struct brw_wm_compile *c,
     */
    for (i = 0; i < nr_args; i++) {
       for (j = 0; j < 4; j++) {
-        out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
+        out->src[i][j] = get_new_ref(c, inst->src[i], j, out);
       }
    }
 
@@ -380,15 +345,6 @@ static void pass0_init_payload( struct brw_wm_compile *c )
                             &c->payload.depth[j] );
    }
 
-#if 0
-   /* This seems to be an alternative to the INTERP_WPOS stuff I do
-    * elsewhere:
-    */
-   if (c->key.source_depth_reg)
-      pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
-                           &c->payload.depth[c->key.source_depth_reg/2]);
-#endif
-   
    for (i = 0; i < FRAG_ATTRIB_MAX; i++)
       pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, 
                             &c->payload.input_interp[i] );      
@@ -403,6 +359,9 @@ static void pass0_init_payload( struct brw_wm_compile *c )
  * the same number.
  *
  * Translate away swizzling and eliminate non-saturating moves.
+ *
+ * Translate instructions from Mesa's prog_instruction structs to our
+ * internal brw_wm_instruction representation.
  */
 void brw_wm_pass0( struct brw_wm_compile *c )
 {
@@ -421,7 +380,7 @@ void brw_wm_pass0( struct brw_wm_compile *c )
        */      
       switch (inst->Opcode) {
       case OPCODE_MOV: 
-        if (!inst->SaturateMode) {
+        if (!inst->dst.Saturate) {
            pass0_precalc_mov(c, inst);
         }
         else {
index 85a3a55ca4f3ecff2ea5d415648b226cb2c6ad11..b0356b1bd59e0666615288515dba2aa1594a8551 100644 (file)
@@ -90,17 +90,24 @@ static void track_arg(struct brw_wm_compile *c,
 static GLuint get_texcoord_mask( GLuint tex_idx )
 {
    switch (tex_idx) {
-   case TEXTURE_1D_INDEX:
+   case TGSI_TEXTURE_1D:
       return BRW_WRITEMASK_X;
-   case TEXTURE_2D_INDEX:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
       return BRW_WRITEMASK_XY;
-   case TEXTURE_3D_INDEX:
+   case TGSI_TEXTURE_3D:
       return BRW_WRITEMASK_XYZ;
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_CUBE:
       return BRW_WRITEMASK_XYZ;
-   case TEXTURE_RECT_INDEX:
-      return BRW_WRITEMASK_XY;
-   default: return 0;
+
+   case TGSI_TEXTURE_SHADOW1D:
+      return BRW_WRITEMASK_XZ;
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      return BRW_WRITEMASK_XYZ;
+   default: 
+      assert(0);
+      return 0;
    }
 }
 
@@ -217,14 +224,9 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       case TGSI_OPCODE_TEX:
       case TGSI_OPCODE_TXP:
         read0 = get_texcoord_mask(inst->tex_idx);
-
-         if (inst->tex_shadow)
-           read0 |= BRW_WRITEMASK_Z;
         break;
 
       case TGSI_OPCODE_TXB:
-        /* Shadow ignored for txb.
-         */
         read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W;
         break;
 
index edabf6ceb6ab407fcb170442c081c107377e61eb..1898f38cef1217aabeacd328ed6976fb466d4738 100644 (file)
@@ -52,7 +52,7 @@ struct brw_wm_unit_key {
    unsigned int max_threads;
 
    unsigned int nr_surfaces, sampler_count;
-   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+   GLboolean uses_depth, computes_depth, uses_kill, has_flow_control;
    GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
    GLfloat offset_units, offset_factor;
 };
@@ -114,10 +114,10 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 
    /* _NEW_COLOR */
    key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
-   key->is_glsl = bfp->isGLSL;
+   key->has_flow_control = bfp->has_flow_control;
 
    /* temporary sanity check assertion */
-   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+   ASSERT(bfp->has_flow_control == brw_wm_has_flow_control(fp));
 
    /* _NEW_QUERY */
    key->stats_wm = (brw->query.stats_wm != 0);
@@ -184,7 +184,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    wm.wm5.program_computes_depth = key->computes_depth;
    wm.wm5.program_uses_killpixel = key->uses_kill;
 
-   if (key->is_glsl)
+   if (key->has_flow_control)
       wm.wm5.enable_8_pix = 1;
    else
       wm.wm5.enable_16_pix = 1;