i965g: remove duplicate set_viewport_state, fixes samples/depth

[mesa.git] / src / gallium / drivers / i965 / brw_vs_emit.c
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c

index 5366ab85140b3df9c1ae2459728c23060ebf4c98..6d8366f8624931248981f6450bc3cfb67dd8415c 100644 (file)
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -34,15 +34,24 @@
  #include "util/u_memory.h"
  #include "util/u_math.h"
  
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_ureg_parse.h"
+#include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_dump.h"
  #include "tgsi/tgsi_info.h"
  
  #include "brw_context.h"
  #include "brw_vs.h"
  #include "brw_debug.h"
+#include "brw_disasm.h"
  
+/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
+ */
+static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
+{
+   int nr = reg + slot/2;
+   int subnr = (slot%2) * 4;
+
+   return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
+}
  
  
  static struct brw_reg get_tmp( struct brw_vs_compile *c )
@@ -67,6 +76,52 @@ static void release_tmps( struct brw_vs_compile *c )
  }
  
  
+static boolean is_position_output( struct brw_vs_compile *c,
+                                   unsigned vs_output )
+{
+   struct brw_vertex_shader *vs = c->vp;
+
+   if (vs_output == c->prog_data.output_edgeflag) {
+      return FALSE;
+   }
+   else {
+      unsigned semantic = vs->info.output_semantic_name[vs_output];
+      unsigned index = vs->info.output_semantic_index[vs_output];
+      
+      return (semantic == TGSI_SEMANTIC_POSITION &&
+              index == 0);
+   }
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+                                  unsigned vs_output,
+                                  unsigned *fs_input_slot )
+{
+   struct brw_vertex_shader *vs = c->vp;
+
+   if (vs_output == c->prog_data.output_edgeflag) {
+      *fs_input_slot = c->key.fs_signature.nr_inputs;
+      return TRUE;
+   }
+   else {
+      unsigned semantic = vs->info.output_semantic_name[vs_output];
+      unsigned index = vs->info.output_semantic_index[vs_output];
+      unsigned i;
+
+      for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+         if (c->key.fs_signature.input[i].semantic == semantic &&
+          c->key.fs_signature.input[i].semantic_index == index) {
+            *fs_input_slot = i;
+            return TRUE;
+         }
+      }
+   }
+
+   return FALSE;
+}
+
+
  /**
   * Preallocate GRF register before code emit.
   * Do things as simply as possible.  Allocate and populate all regs
@@ -74,7 +129,7 @@ static void release_tmps( struct brw_vs_compile *c )
   */
  static void brw_vs_alloc_regs( struct brw_vs_compile *c )
  {
-   GLuint i, reg = 0, mrf;
+   GLuint i, reg = 0, subreg = 0, mrf;
     int attributes_in_vue;
  
     /* Determine whether to use a real constant buffer or use a block
@@ -82,11 +137,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
      * works if everything fits in the GRF.
      * XXX this heuristic/check may need some fine tuning...
      */
-   if (c->vp->info.file_max[TGSI_FILE_CONSTANT] +
-       c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF)
+   if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+       c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+       c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF)
        c->vp->use_const_buffer = GL_TRUE;
-   else
+   else {
+      /* XXX: immediates can go elsewhere if necessary:
+       */
+      assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+            c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF);
+
        c->vp->use_const_buffer = GL_FALSE;
+   }
  
     /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
  
@@ -98,33 +160,57 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     /* User clip planes from curbe: 
      */
     if (c->key.nr_userclip) {
-      for (i = 0; i < c->key.nr_userclip; i++) {
-        c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+      /* Skip over fixed planes:  Or never read them into vs unit?
+       */
+      subreg += 6;
+
+      for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
+        c->userplane[i] = 
+            stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
        }     
  
        /* Deal with curbe alignment:
         */
-      reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+      subreg = align(subreg, 2);
+      /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
     }
  
-   /* Vertex program parameters from curbe:
+
+   /* Immediates: always in the curbe.
+    *
+    * XXX: Can try to encode some immediates as brw immediates
+    * XXX: Make sure ureg sets minimal immediate size and respect it
+    * here.
      */
-   if (c->vp->use_const_buffer) {
-      /* get constants from a real constant buffer */
-      c->prog_data.curb_read_length = 0;
-      c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+   for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
+      c->regs[TGSI_FILE_IMMEDIATE][i] = 
+         stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
     }
-   else {
-      /* use a section of the GRF for constants */
+   c->prog_data.nr_params = c->vp->info.immediate_count * 4;
+
+
+   /* Vertex constant buffer.
+    *
+    * Constants from the buffer can be either cached in the curbe or
+    * loaded as needed from the actual constant buffer.
+    */
+   if (!c->vp->use_const_buffer) {
        GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      for (i = 0; i < nr_params; i++) {
-         c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+
+      for (i = 0; i < nr_params; i++, subreg++) {
+         c->regs[TGSI_FILE_CONSTANT][i] =
+            stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
        }
-      reg += (nr_params + 1) / 2;
-      c->prog_data.curb_read_length = reg - 1;
-      c->prog_data.nr_params = nr_params * 4;
+
+      c->prog_data.nr_params += nr_params * 4;
     }
  
+   /* All regs allocated
+    */
+   reg += (subreg + 1) / 2;
+   c->prog_data.curb_read_length = reg - 1;
+
+
     /* Allocate input regs:  
      */
     c->nr_inputs = c->vp->info.num_inputs;
@@ -139,46 +225,55 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     if (c->nr_inputs == 0)
        reg++;
  
+
+
     /* Allocate outputs.  The non-position outputs go straight into message regs.
      */
-   c->nr_outputs = 0;
-   c->first_output = reg;
-   c->first_overflow_output = 0;
+   c->nr_outputs = c->prog_data.nr_outputs;
  
     if (c->chipset.is_igdng)
        mrf = 8;
     else
        mrf = 4;
  
+   
+   if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+      c->overflow_grf_start = reg;
+      c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+      reg += c->overflow_count;
+   }
+
     /* XXX: need to access vertex output semantics here:
      */
-   c->nr_outputs = c->prog_data.nr_outputs;
-   for (i = 0; i < c->prog_data.nr_outputs; i++) {
-      assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+   for (i = 0; i < c->nr_outputs; i++) {
+      unsigned slot;
  
-      /* XXX: Hardwire position to zero:
+      /* XXX: Put output position in slot zero always.  Clipper, etc,
+       * need access to this reg.
         */
-      if (i == 0) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+      if (is_position_output(c, i)) {
+        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
          reg++;
        }
-      /* XXX: disable psiz:
-       */
-      else if (0) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-        reg++;
-        mrf++;         /* just a placeholder?  XXX fix later stages & remove this */
-      }
-      else if (mrf < 16) {
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
-        mrf++;
+      else if (find_output_slot(c, i, &slot)) {
+         
+         if (0 /* is_psize_output(c, i) */ ) {
+            /* c->psize_out.grf = reg; */
+            /* c->psize_out.mrf = i; */
+         }
+         
+         /* The first (16-4) outputs can go straight into the message regs.
+          */
+         if (slot + mrf < BRW_MAX_MRF) {
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+         }
+         else {
+            int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+         }
        }
        else {
-        /* too many vertex results to fit in MRF, use GRF for overflow */
-        if (!c->first_overflow_output)
-           c->first_overflow_output = i;
-        c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-        reg++;
+         c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
        }
     }     
  
@@ -214,15 +309,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
        }
     }
  
+#if 0
     for (i = 0; i < 128; i++) {
        if (c->output_regs[i].used_in_src) {
           c->output_regs[i].reg = brw_vec8_grf(reg, 0);
           reg++;
        }
     }
+#endif
  
-   c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
-   reg += 2;
+   if (c->vp->has_flow_control) {
+      c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+      reg += 2;
+   }
  
     /* Some opcodes need an internal temporary:
      */
@@ -754,21 +853,20 @@ static void emit_nrm( struct brw_vs_compile *c,
  
  static struct brw_reg
  get_constant(struct brw_vs_compile *c,
-             const struct ureg_instruction *inst,
-             GLuint argIndex)
+            GLuint argIndex,
+            GLuint index,
+            GLboolean relAddr)
  {
-   const struct ureg_src src = inst->src[argIndex];
     struct brw_compile *p = &c->func;
     struct brw_reg const_reg;
     struct brw_reg const2_reg;
-   const GLboolean relAddr = src.Indirect;
  
     assert(argIndex < 3);
  
-   if (c->current_const[argIndex].index != src.Index || relAddr) {
+   if (c->current_const[argIndex].index != index || relAddr) {
        struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
  
-      c->current_const[argIndex].index = src.Index;
+      c->current_const[argIndex].index = index;
  
  #if 0
        printf("  fetch const[%d] for arg %d into reg %d\n",
@@ -780,7 +878,7 @@ get_constant(struct brw_vs_compile *c,
                         0,                             /* oword */
                         relAddr,                       /* relative indexing? */
                         addrReg,                       /* address register */
-                       16 * src.Index,               /* byte offset */
+                       16 * index,               /* byte offset */
                         SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
                         );
  
@@ -797,7 +895,7 @@ get_constant(struct brw_vs_compile *c,
                            1,                       /* oword */
                            relAddr,                 /* relative indexing? */
                            addrReg,                 /* address register */
-                          16 * src.Index,         /* byte offset */
+                          16 * index,         /* byte offset */
                            SURF_INDEX_VERT_CONST_BUFFER
                            );
        }
@@ -894,12 +992,11 @@ static struct brw_reg deref( struct brw_vs_compile *c,
   */
  static struct brw_reg
  get_src_reg( struct brw_vs_compile *c,
-             const struct ureg_instruction *inst,
-             GLuint argIndex )
+            GLuint argIndex,
+            GLuint file,
+            GLint index,
+            GLboolean relAddr )
  {
-   const GLuint file = inst->src[argIndex].File;
-   const GLint index = inst->src[argIndex].Index;
-   const GLboolean relAddr = inst->src[argIndex].Indirect;
  
     switch (file) {
     case TGSI_FILE_TEMPORARY:
@@ -913,9 +1010,12 @@ get_src_reg( struct brw_vs_compile *c,
           return c->regs[file][index];
        }
  
+   case TGSI_FILE_IMMEDIATE:
+      return c->regs[file][index];
+
     case TGSI_FILE_CONSTANT:
        if (c->vp->use_const_buffer) {
-         return get_constant(c, inst, argIndex);
+         return get_constant(c, argIndex, index, relAddr);
        }
        else if (relAddr) {
           return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
@@ -962,27 +1062,30 @@ static void emit_arl( struct brw_vs_compile *c,
   * Return the brw reg for the given instruction's src argument.
   */
  static struct brw_reg get_arg( struct brw_vs_compile *c,
-                               const struct ureg_instruction *inst,
+                               const struct tgsi_full_src_register *src,
                                 GLuint argIndex )
  {
-   const struct ureg_src src = inst->src[argIndex];
     struct brw_reg reg;
  
-   if (src.File == TGSI_FILE_NULL)
+   if (src->SrcRegister.File == TGSI_FILE_NULL)
        return brw_null_reg();
  
-   reg = get_src_reg(c, inst, argIndex);
+   reg = get_src_reg(c, argIndex,
+                    src->SrcRegister.File,
+                    src->SrcRegister.Index,
+                    src->SrcRegister.Indirect);
  
     /* Convert 3-bit swizzle to 2-bit.  
      */
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src.SwizzleX,
-                                      src.SwizzleY,
-                                      src.SwizzleZ,
-                                      src.SwizzleW);
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SrcRegister.SwizzleX,
+                                      src->SrcRegister.SwizzleY,
+                                      src->SrcRegister.SwizzleZ,
+                                      src->SrcRegister.SwizzleW);
  
-   /* Note this is ok for non-swizzle instructions: 
+   reg.negate = src->SrcRegister.Negate ? 1 : 0;   
+
+   /* XXX: abs, absneg
      */
-   reg.negate = src.Negate ? 1 : 0;   
  
     return reg;
  }
@@ -992,19 +1095,21 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
   * Get brw register for the given program dest register.
   */
  static struct brw_reg get_dst( struct brw_vs_compile *c,
-                              struct ureg_dst dst )
+                              unsigned file,
+                              unsigned index,
+                              unsigned writemask )
  {
     struct brw_reg reg;
  
-   switch (dst.File) {
+   switch (file) {
     case TGSI_FILE_TEMPORARY:
     case TGSI_FILE_OUTPUT:
-      assert(c->regs[dst.File][dst.Index].nr != 0);
-      reg = c->regs[dst.File][dst.Index];
+      assert(c->regs[file][index].nr != 0);
+      reg = c->regs[file][index];
        break;
     case TGSI_FILE_ADDRESS:
-      assert(dst.Index == 0);
-      reg = c->regs[dst.File][dst.Index];
+      assert(index == 0);
+      reg = c->regs[file][index];
        break;
     case TGSI_FILE_NULL:
        /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
@@ -1015,7 +1120,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
        reg = brw_null_reg();
     }
  
-   reg.dw1.bits.writemask = dst.WriteMask;
+   reg.dw1.bits.writemask = writemask;
  
     return reg;
  }
@@ -1033,13 +1138,13 @@ static void emit_vertex_write( struct brw_vs_compile *c)
     struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
     struct brw_reg ndc;
     int eot;
+   int i;
     GLuint len_vertext_header = 2;
  
     if (c->key.copy_edgeflag) {
-      assert(0);
        brw_MOV(p, 
-             get_reg(c, TGSI_FILE_OUTPUT, 0),
-             get_reg(c, TGSI_FILE_INPUT, 0));
+              get_reg(c, TGSI_FILE_OUTPUT, c->prog_data.output_edgeflag),
+              brw_imm_f(1));
     }
  
     /* Build ndc coords */
@@ -1128,7 +1233,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
         len_vertext_header = 2;
     }
  
-   eot = (c->first_overflow_output == 0);
+   eot = (c->overflow_count == 0);
  
     brw_urb_WRITE(p, 
                  brw_null_reg(), /* dest */
@@ -1143,19 +1248,22 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                  0,             /* urb destination offset */
                  BRW_URB_SWIZZLE_INTERLEAVE);
  
-   if (c->first_overflow_output > 0) {
-      /* Not all of the vertex outputs/results fit into the MRF.
-       * Move the overflowed attributes from the GRF to the MRF and
-       * issue another brw_urb_WRITE().
-       */
+   /* Not all of the vertex outputs/results fit into the MRF.
+    * Move the overflowed attributes from the GRF to the MRF and
+    * issue another brw_urb_WRITE().
+    */
+   for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+      unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+      GLuint j;
+
+      eot = (i + nr >= c->overflow_count);
+
        /* XXX I'm not 100% sure about which MRF regs to use here.  Starting
         * at mrf[4] atm...
         */
-      GLuint i, mrf = 0;
-      for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
-        /* move from GRF to MRF */
-        brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
-        mrf++;
+      for (j = 0; j < nr; j++) {
+        brw_MOV(p, brw_message_reg(4+j), 
+                 brw_vec8_grf(c->overflow_grf_start + i + j, 0));
        }
  
        brw_urb_WRITE(p,
@@ -1164,11 +1272,11 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                      c->r0,          /* src */
                      0,              /* allocate */
                      1,              /* used */
-                    mrf+1,          /* msg len */
+                    nr+1,          /* msg len */
                      0,              /* response len */
-                    1,              /* eot */
-                    1,              /* writes complete */
-                    BRW_MAX_MRF-1,  /* urb destination offset */
+                    eot,            /* eot */
+                    eot,            /* writes complete */
+                    i-1,            /* urb destination offset */
                      BRW_URB_SWIZZLE_INTERLEAVE);
     }
  }
@@ -1199,7 +1307,7 @@ post_vs_emit( struct brw_vs_compile *c,
  }
  
  static uint32_t
-get_predicate(const struct ureg_instruction *inst)
+get_predicate(const struct tgsi_full_instruction *inst)
  {
     /* XXX: disabling for now
      */
@@ -1242,8 +1350,10 @@ get_predicate(const struct ureg_instruction *inst)
  }
  
  static void emit_insn(struct brw_vs_compile *c,
-                     const struct ureg_instruction *inst)
+                     const struct tgsi_full_instruction *inst)
  {
+   unsigned opcode = inst->Instruction.Opcode;
+   unsigned label = inst->InstructionExtLabel.Label;
     struct brw_compile *p = &c->func;
     struct brw_reg args[3], dst;
     GLuint i;
@@ -1256,20 +1366,25 @@ static void emit_insn(struct brw_vs_compile *c,
     /* Get argument regs.
      */
     for (i = 0; i < 3; i++) {
-      args[i] = get_arg(c, inst, i);
+      args[i] = get_arg(c, &inst->FullSrcRegisters[i], i);
     }
  
     /* Get dest regs.  Note that it is possible for a reg to be both
      * dst and arg, given the static allocation of registers.  So
      * care needs to be taken emitting multi-operation instructions.
      */ 
-   dst = get_dst(c, inst->dst);
+   dst = get_dst(c, 
+                inst->FullDstRegisters[0].DstRegister.File,
+                inst->FullDstRegisters[0].DstRegister.Index,
+                inst->FullDstRegisters[0].DstRegister.WriteMask);
  
-   if (inst->dst.Saturate) {
+   /* XXX: saturate
+    */
+   if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
        debug_printf("Unsupported saturate in vertex shader");
     }
  
-   switch (inst->opcode) {
+   switch (opcode) {
     case TGSI_OPCODE_ABS:
        brw_MOV(p, dst, brw_abs(args[0]));
        break;
@@ -1347,7 +1462,8 @@ static void emit_insn(struct brw_vs_compile *c,
        emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
        break;
     case TGSI_OPCODE_RSQ:
-      emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+      emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, 
+                 brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL);
        break;
     case TGSI_OPCODE_SEQ:
        emit_seq(p, dst, args[0], args[1]);
@@ -1443,7 +1559,7 @@ static void emit_insn(struct brw_vs_compile *c,
        brw_set_access_mode(p, BRW_ALIGN_16);
        brw_ADD(p, get_addr_reg(c->stack_index),
               get_addr_reg(c->stack_index), brw_imm_d(4));
-      brw_save_call(p, inst->label, p->nr_insn);
+      brw_save_call(p, label, p->nr_insn);
        brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
        break;
     case TGSI_OPCODE_RET:
@@ -1468,8 +1584,8 @@ static void emit_insn(struct brw_vs_compile *c,
        break;
     default:
        debug_printf("Unsupported opcode %i (%s) in vertex shader",
-                  inst->opcode, 
-                  tgsi_get_opcode_name(inst->opcode));
+                  opcode, 
+                  tgsi_get_opcode_name(opcode));
     }
  
     /* Set the predication update on the last instruction of the native
@@ -1498,13 +1614,12 @@ static void emit_insn(struct brw_vs_compile *c,
  void brw_vs_emit(struct brw_vs_compile *c)
  {
     struct brw_compile *p = &c->func;
+   const struct tgsi_token *tokens = c->vp->tokens;
     struct brw_instruction *end_inst, *last_inst;
-   struct ureg_parse_context parse;
-   struct ureg_declaration *decl;
-   struct ureg_declaration *imm;
-   struct ureg_declaration *insn;
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction *inst;
  
-   if (BRW_DEBUG & DEBUG_VS)
+//   if (BRW_DEBUG & DEBUG_VS)
        tgsi_dump(c->vp->tokens, 0); 
  
     c->stack_index = brw_indirect(0, 0);
@@ -1512,21 +1627,38 @@ void brw_vs_emit(struct brw_vs_compile *c)
     brw_set_compression_control(p, BRW_COMPRESSION_NONE);
     brw_set_access_mode(p, BRW_ALIGN_16);
     
+
     /* Static register allocation
      */
     brw_vs_alloc_regs(c);
-   brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
  
-   while (ureg_next_decl(&parse, &decl)) {
+   if (c->vp->has_flow_control) {
+      brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
     }
  
-   while (ureg_next_immediate(&parse, &imm)) {
-   }
-
-   while (ureg_next_instruction(&parse, &insn)) {
+   /* Instructions
+    */
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+        break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         inst = &parse.FullToken.FullInstruction;
+        emit_insn( c, inst );
+         break;
+
+      default:
+         assert( 0 );
+      }
     }
+   tgsi_parse_free( &parse );
  
-   end_inst = &p->store[end_offset];
+   end_inst = &p->store[c->end_offset];
     last_inst = &p->store[p->nr_insn];
  
     /* The END instruction will be patched to jump to this code */
@@ -1535,11 +1667,7 @@ void brw_vs_emit(struct brw_vs_compile *c)
     post_vs_emit(c, end_inst, last_inst);
  
     if (BRW_DEBUG & DEBUG_VS) {
-      int i;
-
        debug_printf("vs-native:\n");
-      for (i = 0; i < p->nr_insn; i++)
-        brw_disasm(stderr, &p->store[i]);
-      debug_printf("\n");
+      brw_disasm(stderr, p->store, p->nr_insn);
     }
  }