gallium: Add a new clip_halfz rasterizer state.
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_ureg.c
index 02de12d77d50d1bda18fed5b275b3c9a36818b57..782f4495fa3b299261977e7673fd649f6435ffa4 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -36,6 +36,7 @@
 #include "util/u_debug.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_bitmask.h"
 
 union tgsi_any_token {
    struct tgsi_header header;
@@ -46,14 +47,19 @@ union tgsi_any_token {
    struct tgsi_declaration decl;
    struct tgsi_declaration_range decl_range;
    struct tgsi_declaration_dimension decl_dim;
+   struct tgsi_declaration_interp decl_interp;
    struct tgsi_declaration_semantic decl_semantic;
+   struct tgsi_declaration_sampler_view decl_sampler_view;
+   struct tgsi_declaration_array array;
    struct tgsi_immediate imm;
    union  tgsi_immediate_data imm_data;
    struct tgsi_instruction insn;
    struct tgsi_instruction_predicate insn_predicate;
    struct tgsi_instruction_label insn_label;
    struct tgsi_instruction_texture insn_texture;
+   struct tgsi_texture_offset insn_texture_offset;
    struct tgsi_src_register src;
+   struct tgsi_ind_register ind;
    struct tgsi_dimension dim;
    struct tgsi_dst_register dst;
    unsigned value;
@@ -69,12 +75,12 @@ struct ureg_tokens {
 
 #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
 #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
-#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
+#define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS
 #define UREG_MAX_CONSTANT_RANGE 32
-#define UREG_MAX_IMMEDIATE 32
-#define UREG_MAX_TEMP 256
+#define UREG_MAX_IMMEDIATE 256
 #define UREG_MAX_ADDR 2
 #define UREG_MAX_PRED 1
+#define UREG_MAX_ARRAY_TEMPS 256
 
 struct const_decl {
    struct {
@@ -120,6 +126,7 @@ struct ureg_program
    struct {
       unsigned semantic_name;
       unsigned semantic_index;
+      unsigned usage_mask; /* = TGSI_WRITEMASK_* */
    } output[UREG_MAX_OUTPUT];
    unsigned nr_outputs;
 
@@ -137,9 +144,24 @@ struct ureg_program
    struct ureg_src sampler[PIPE_MAX_SAMPLERS];
    unsigned nr_samplers;
 
-   unsigned temps_active[UREG_MAX_TEMP / 32];
+   struct {
+      unsigned index;
+      unsigned target;
+      unsigned return_type_x;
+      unsigned return_type_y;
+      unsigned return_type_z;
+      unsigned return_type_w;
+   } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_sampler_views;
+
+   struct util_bitmask *free_temps;
+   struct util_bitmask *local_temps;
+   struct util_bitmask *decl_temps;
    unsigned nr_temps;
 
+   unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
+   unsigned nr_array_temps;
+
    struct const_decl const_decls;
    struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS];
 
@@ -149,6 +171,7 @@ struct ureg_program
    unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */
    unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */
    unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */
+   unsigned char property_fs_depth_layout; /* TGSI_FS_DEPTH_LAYOUT */
 
    unsigned nr_addrs;
    unsigned nr_preds;
@@ -245,6 +268,7 @@ ureg_dst_register( unsigned file,
    dst.PredSwizzleZ = TGSI_SWIZZLE_Z;
    dst.PredSwizzleW = TGSI_SWIZZLE_W;
    dst.Index     = index;
+   dst.ArrayID = 0;
 
    return dst;
 }
@@ -292,6 +316,13 @@ ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg,
    ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs;
 }
 
+void
+ureg_property_fs_depth_layout(struct ureg_program *ureg,
+                              unsigned fs_depth_layout)
+{
+   ureg->property_fs_depth_layout = fs_depth_layout;
+}
+
 struct ureg_src
 ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
                        unsigned semantic_name,
@@ -376,21 +407,27 @@ ureg_DECL_system_value(struct ureg_program *ureg,
 
 
 struct ureg_dst 
-ureg_DECL_output( struct ureg_program *ureg,
-                  unsigned name,
-                  unsigned index )
+ureg_DECL_output_masked( struct ureg_program *ureg,
+                         unsigned name,
+                         unsigned index,
+                         unsigned usage_mask )
 {
    unsigned i;
 
+   assert(usage_mask != 0);
+
    for (i = 0; i < ureg->nr_outputs; i++) {
       if (ureg->output[i].semantic_name == name &&
-          ureg->output[i].semantic_index == index) 
+          ureg->output[i].semantic_index == index) { 
+         ureg->output[i].usage_mask |= usage_mask;
          goto out;
+      }
    }
 
    if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
       ureg->output[i].semantic_name = name;
       ureg->output[i].semantic_index = index;
+      ureg->output[i].usage_mask = usage_mask;
       ureg->nr_outputs++;
    }
    else {
@@ -402,6 +439,15 @@ out:
 }
 
 
+struct ureg_dst 
+ureg_DECL_output( struct ureg_program *ureg,
+                  unsigned name,
+                  unsigned index )
+{
+   return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW);
+}
+
+
 /* Returns a new constant register.  Keep track of which have been
  * referred to so that we can emit decls later.
  *
@@ -493,43 +539,79 @@ out:
    return ureg_src_register(TGSI_FILE_CONSTANT, index);
 }
 
-
-/* Allocate a new temporary.  Temporaries greater than UREG_MAX_TEMP
- * are legal, but will not be released.
- */
-struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
+static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
+                                        boolean local )
 {
    unsigned i;
 
-   for (i = 0; i < UREG_MAX_TEMP; i += 32) {
-      int bit = ffs(~ureg->temps_active[i/32]);
-      if (bit != 0) {
-         i += bit - 1;
-         goto out;
-      }
+   /* Look for a released temporary.
+    */
+   for (i = util_bitmask_get_first_index(ureg->free_temps);
+        i != UTIL_BITMASK_INVALID_INDEX;
+        i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
+      if (util_bitmask_get(ureg->local_temps, i) == local)
+         break;
    }
 
-   /* No reusable temps, so allocate a new one:
+   /* Or allocate a new one.
     */
-   i = ureg->nr_temps++;
+   if (i == UTIL_BITMASK_INVALID_INDEX) {
+      i = ureg->nr_temps++;
 
-out:
-   if (i < UREG_MAX_TEMP)
-      ureg->temps_active[i/32] |= 1 << (i % 32);
+      if (local)
+         util_bitmask_set(ureg->local_temps, i);
+
+      /* Start a new declaration when the local flag changes */
+      if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local)
+         util_bitmask_set(ureg->decl_temps, i);
+   }
 
-   if (i >= ureg->nr_temps)
-      ureg->nr_temps = i + 1;
+   util_bitmask_clear(ureg->free_temps, i);
 
    return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
 }
 
+struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
+{
+   return alloc_temporary(ureg, FALSE);
+}
+
+struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
+{
+   return alloc_temporary(ureg, TRUE);
+}
+
+struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
+                                           unsigned size,
+                                           boolean local )
+{
+   unsigned i = ureg->nr_temps;
+   struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
+
+   if (local)
+      util_bitmask_set(ureg->local_temps, i);
+
+   /* Always start a new declaration at the start */
+   util_bitmask_set(ureg->decl_temps, i);
+
+   ureg->nr_temps += size;
+
+   /* and also at the end of the array */
+   util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
+
+   if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
+      ureg->array_temps[ureg->nr_array_temps++] = i;
+      dst.ArrayID = ureg->nr_array_temps;
+   }
+
+   return dst;
+}
 
 void ureg_release_temporary( struct ureg_program *ureg,
                              struct ureg_dst tmp )
 {
    if(tmp.File == TGSI_FILE_TEMPORARY)
-      if (tmp.Index < UREG_MAX_TEMP)
-         ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32));
+      util_bitmask_set(ureg->free_temps, tmp.Index);
 }
 
 
@@ -578,6 +660,41 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
    return ureg->sampler[0];
 }
 
+/*
+ * Allocate a new shader sampler view.
+ */
+struct ureg_src
+ureg_DECL_sampler_view(struct ureg_program *ureg,
+                       unsigned index,
+                       unsigned target,
+                       unsigned return_type_x,
+                       unsigned return_type_y,
+                       unsigned return_type_z,
+                       unsigned return_type_w)
+{
+   struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
+   uint i;
+
+   for (i = 0; i < ureg->nr_sampler_views; i++) {
+      if (ureg->sampler_view[i].index == index) {
+         return reg;
+      }
+   }
+
+   if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
+      ureg->sampler_view[i].index = index;
+      ureg->sampler_view[i].target = target;
+      ureg->sampler_view[i].return_type_x = return_type_x;
+      ureg->sampler_view[i].return_type_y = return_type_y;
+      ureg->sampler_view[i].return_type_z = return_type_z;
+      ureg->sampler_view[i].return_type_w = return_type_w;
+      ureg->nr_sampler_views++;
+      return reg;
+   }
+
+   assert(0);
+   return reg;
+}
 
 static int
 match_or_expand_immediate( const unsigned *v,
@@ -752,7 +869,6 @@ ureg_emit_src( struct ureg_program *ureg,
    unsigned n = 0;
 
    assert(src.File != TGSI_FILE_NULL);
-   assert(src.File != TGSI_FILE_OUTPUT);
    assert(src.File < TGSI_FILE_COUNT);
    
    out[n].value = 0;
@@ -769,35 +885,28 @@ ureg_emit_src( struct ureg_program *ureg,
    if (src.Indirect) {
       out[0].src.Indirect = 1;
       out[n].value = 0;
-      out[n].src.File = src.IndirectFile;
-      out[n].src.SwizzleX = src.IndirectSwizzle;
-      out[n].src.SwizzleY = src.IndirectSwizzle;
-      out[n].src.SwizzleZ = src.IndirectSwizzle;
-      out[n].src.SwizzleW = src.IndirectSwizzle;
-      out[n].src.Index = src.IndirectIndex;
+      out[n].ind.File = src.IndirectFile;
+      out[n].ind.Swizzle = src.IndirectSwizzle;
+      out[n].ind.Index = src.IndirectIndex;
+      out[n].ind.ArrayID = src.ArrayID;
       n++;
    }
 
    if (src.Dimension) {
+      out[0].src.Dimension = 1;
+      out[n].dim.Dimension = 0;
+      out[n].dim.Padding = 0;
       if (src.DimIndirect) {
-         out[0].src.Dimension = 1;
          out[n].dim.Indirect = 1;
-         out[n].dim.Dimension = 0;
-         out[n].dim.Padding = 0;
          out[n].dim.Index = src.DimensionIndex;
          n++;
          out[n].value = 0;
-         out[n].src.File = src.DimIndFile;
-         out[n].src.SwizzleX = src.DimIndSwizzle;
-         out[n].src.SwizzleY = src.DimIndSwizzle;
-         out[n].src.SwizzleZ = src.DimIndSwizzle;
-         out[n].src.SwizzleW = src.DimIndSwizzle;
-         out[n].src.Index = src.DimIndIndex;
+         out[n].ind.File = src.DimIndFile;
+         out[n].ind.Swizzle = src.DimIndSwizzle;
+         out[n].ind.Index = src.DimIndIndex;
+         out[n].ind.ArrayID = src.ArrayID;
       } else {
-         out[0].src.Dimension = 1;
          out[n].dim.Indirect = 0;
-         out[n].dim.Dimension = 0;
-         out[n].dim.Padding = 0;
          out[n].dim.Index = src.DimensionIndex;
       }
       n++;
@@ -821,9 +930,10 @@ ureg_emit_dst( struct ureg_program *ureg,
    assert(dst.File != TGSI_FILE_CONSTANT);
    assert(dst.File != TGSI_FILE_INPUT);
    assert(dst.File != TGSI_FILE_SAMPLER);
+   assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
    assert(dst.File != TGSI_FILE_IMMEDIATE);
    assert(dst.File < TGSI_FILE_COUNT);
-   
+
    out[n].value = 0;
    out[n].dst.File = dst.File;
    out[n].dst.WriteMask = dst.WriteMask;
@@ -833,12 +943,10 @@ ureg_emit_dst( struct ureg_program *ureg,
    
    if (dst.Indirect) {
       out[n].value = 0;
-      out[n].src.File = TGSI_FILE_ADDRESS;
-      out[n].src.SwizzleX = dst.IndirectSwizzle;
-      out[n].src.SwizzleY = dst.IndirectSwizzle;
-      out[n].src.SwizzleZ = dst.IndirectSwizzle;
-      out[n].src.SwizzleW = dst.IndirectSwizzle;
-      out[n].src.Index = dst.IndirectIndex;
+      out[n].ind.File = TGSI_FILE_ADDRESS;
+      out[n].ind.Swizzle = dst.IndirectSwizzle;
+      out[n].ind.Index = dst.IndirectIndex;
+      out[n].ind.ArrayID = dst.ArrayID;
       n++;
    }
 
@@ -950,7 +1058,7 @@ ureg_fixup_label(struct ureg_program *ureg,
 void
 ureg_emit_texture(struct ureg_program *ureg,
                   unsigned extended_token,
-                  unsigned target )
+                  unsigned target, unsigned num_offsets)
 {
    union tgsi_any_token *out, *insn;
 
@@ -961,6 +1069,20 @@ ureg_emit_texture(struct ureg_program *ureg,
 
    out[0].value = 0;
    out[0].insn_texture.Texture = target;
+   out[0].insn_texture.NumOffsets = num_offsets;
+}
+
+void
+ureg_emit_texture_offset(struct ureg_program *ureg,
+                         const struct tgsi_texture_offset *offset)
+{
+   union tgsi_any_token *out;
+
+   out = get_tokens( ureg, DOMAIN_INSN, 1);
+
+   out[0].value = 0;
+   out[0].insn_texture_offset = *offset;
+   
 }
 
 
@@ -1027,6 +1149,8 @@ ureg_tex_insn(struct ureg_program *ureg,
               const struct ureg_dst *dst,
               unsigned nr_dst,
               unsigned target,
+              const struct tgsi_texture_offset *texoffsets,
+              unsigned nr_offset,
               const struct ureg_src *src,
               unsigned nr_src )
 {
@@ -1059,7 +1183,10 @@ ureg_tex_insn(struct ureg_program *ureg,
                          nr_dst,
                          nr_src);
 
-   ureg_emit_texture( ureg, insn.extended_token, target );
+   ureg_emit_texture( ureg, insn.extended_token, target, nr_offset );
+
+   for (i = 0; i < nr_offset; i++)
+      ureg_emit_texture_offset( ureg, &texoffsets[i]);
 
    for (i = 0; i < nr_dst; i++)
       ureg_emit_dst( ureg, dst[i] );
@@ -1107,7 +1234,8 @@ emit_decl_semantic(struct ureg_program *ureg,
                    unsigned file,
                    unsigned index,
                    unsigned semantic_name,
-                   unsigned semantic_index)
+                   unsigned semantic_index,
+                   unsigned usage_mask)
 {
    union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
 
@@ -1115,7 +1243,7 @@ emit_decl_semantic(struct ureg_program *ureg,
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
    out[0].decl.NrTokens = 3;
    out[0].decl.File = file;
-   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
+   out[0].decl.UsageMask = usage_mask;
    out[0].decl.Semantic = 1;
 
    out[1].value = 0;
@@ -1138,27 +1266,56 @@ emit_decl_fs(struct ureg_program *ureg,
              unsigned cylindrical_wrap,
              unsigned centroid)
 {
-   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4);
 
    out[0].value = 0;
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
-   out[0].decl.NrTokens = 3;
+   out[0].decl.NrTokens = 4;
    out[0].decl.File = file;
    out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
-   out[0].decl.Interpolate = interpolate;
+   out[0].decl.Interpolate = 1;
    out[0].decl.Semantic = 1;
-   out[0].decl.CylindricalWrap = cylindrical_wrap;
-   out[0].decl.Centroid = centroid;
 
    out[1].value = 0;
    out[1].decl_range.First = index;
    out[1].decl_range.Last = index;
 
    out[2].value = 0;
-   out[2].decl_semantic.Name = semantic_name;
-   out[2].decl_semantic.Index = semantic_index;
+   out[2].decl_interp.Interpolate = interpolate;
+   out[2].decl_interp.CylindricalWrap = cylindrical_wrap;
+   out[2].decl_interp.Centroid = centroid;
+
+   out[3].value = 0;
+   out[3].decl_semantic.Name = semantic_name;
+   out[3].decl_semantic.Index = semantic_index;
 }
 
+static void
+emit_decl_temps( struct ureg_program *ureg,
+                 unsigned first, unsigned last,
+                 boolean local,
+                 unsigned arrayid )
+{
+   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL,
+                                           arrayid ? 3 : 2 );
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 2;
+   out[0].decl.File = TGSI_FILE_TEMPORARY;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+   out[0].decl.Local = local;
+
+   out[1].value = 0;
+   out[1].decl_range.First = first;
+   out[1].decl_range.Last = last;
+
+   if (arrayid) {
+      out[0].decl.Array = 1;
+      out[2].value = 0;
+      out[2].array.ArrayID = arrayid;
+   }
+}
 
 static void emit_decl_range( struct ureg_program *ureg,
                              unsigned file,
@@ -1172,7 +1329,6 @@ static void emit_decl_range( struct ureg_program *ureg,
    out[0].decl.NrTokens = 2;
    out[0].decl.File = file;
    out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
-   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
    out[0].decl.Semantic = 0;
 
    out[1].value = 0;
@@ -1194,7 +1350,6 @@ emit_decl_range2D(struct ureg_program *ureg,
    out[0].decl.NrTokens = 3;
    out[0].decl.File = file;
    out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
-   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
    out[0].decl.Dimension = 1;
 
    out[1].value = 0;
@@ -1205,6 +1360,35 @@ emit_decl_range2D(struct ureg_program *ureg,
    out[2].decl_dim.Index2D = index2D;
 }
 
+static void
+emit_decl_sampler_view(struct ureg_program *ureg,
+                       unsigned index,
+                       unsigned target,
+                       unsigned return_type_x,
+                       unsigned return_type_y,
+                       unsigned return_type_z,
+                       unsigned return_type_w )
+{
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 3;
+   out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
+   out[0].decl.UsageMask = 0xf;
+
+   out[1].value = 0;
+   out[1].decl_range.First = index;
+   out[1].decl_range.Last = index;
+
+   out[2].value = 0;
+   out[2].decl_sampler_view.Resource    = target;
+   out[2].decl_sampler_view.ReturnTypeX = return_type_x;
+   out[2].decl_sampler_view.ReturnTypeY = return_type_y;
+   out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
+   out[2].decl_sampler_view.ReturnTypeW = return_type_w;
+}
+
 static void
 emit_immediate( struct ureg_program *ureg,
                 const unsigned *v,
@@ -1292,6 +1476,14 @@ static void emit_decls( struct ureg_program *ureg )
                     ureg->property_fs_color0_writes_all_cbufs);
    }
 
+   if (ureg->property_fs_depth_layout) {
+      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_FS_DEPTH_LAYOUT,
+                    ureg->property_fs_depth_layout);
+   }
+
    if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
       for (i = 0; i < UREG_MAX_INPUT; i++) {
          if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
@@ -1315,7 +1507,8 @@ static void emit_decls( struct ureg_program *ureg )
                             TGSI_FILE_INPUT,
                             ureg->gs_input[i].index,
                             ureg->gs_input[i].semantic_name,
-                            ureg->gs_input[i].semantic_index);
+                            ureg->gs_input[i].semantic_index,
+                            TGSI_WRITEMASK_XYZW);
       }
    }
 
@@ -1324,7 +1517,8 @@ static void emit_decls( struct ureg_program *ureg )
                          TGSI_FILE_SYSTEM_VALUE,
                          ureg->system_value[i].index,
                          ureg->system_value[i].semantic_name,
-                         ureg->system_value[i].semantic_index);
+                         ureg->system_value[i].semantic_index,
+                         TGSI_WRITEMASK_XYZW);
    }
 
    for (i = 0; i < ureg->nr_outputs; i++) {
@@ -1332,7 +1526,8 @@ static void emit_decls( struct ureg_program *ureg )
                          TGSI_FILE_OUTPUT,
                          i,
                          ureg->output[i].semantic_name,
-                         ureg->output[i].semantic_index);
+                         ureg->output[i].semantic_index,
+                         ureg->output[i].usage_mask);
    }
 
    for (i = 0; i < ureg->nr_samplers; i++) {
@@ -1341,6 +1536,16 @@ static void emit_decls( struct ureg_program *ureg )
                        ureg->sampler[i].Index, 1 );
    }
 
+   for (i = 0; i < ureg->nr_sampler_views; i++) {
+      emit_decl_sampler_view(ureg,
+                             ureg->sampler_view[i].index,
+                             ureg->sampler_view[i].target,
+                             ureg->sampler_view[i].return_type_x,
+                             ureg->sampler_view[i].return_type_y,
+                             ureg->sampler_view[i].return_type_z,
+                             ureg->sampler_view[i].return_type_w);
+   }
+
    if (ureg->const_decls.nr_constant_ranges) {
       for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
          emit_decl_range(ureg,
@@ -1367,9 +1572,19 @@ static void emit_decls( struct ureg_program *ureg )
    }
 
    if (ureg->nr_temps) {
-      emit_decl_range( ureg,
-                       TGSI_FILE_TEMPORARY,
-                       0, ureg->nr_temps );
+      unsigned array = 0;
+      for (i = 0; i < ureg->nr_temps;) {
+         boolean local = util_bitmask_get(ureg->local_temps, i);
+         unsigned first = i;
+         i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
+         if (i == UTIL_BITMASK_INVALID_INDEX)
+            i = ureg->nr_temps;
+
+         if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
+            emit_decl_temps( ureg, first, i - 1, local, ++array );
+         else
+            emit_decl_temps( ureg, first, i - 1, local, 0 );
+      }
    }
 
    if (ureg->nr_addrs) {
@@ -1468,7 +1683,8 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
 
 
 void *ureg_create_shader( struct ureg_program *ureg,
-                          struct pipe_context *pipe )
+                          struct pipe_context *pipe,
+                          const struct pipe_stream_output_info *so )
 {
    struct pipe_shader_state state;
 
@@ -1476,6 +1692,11 @@ void *ureg_create_shader( struct ureg_program *ureg,
    if(!state.tokens)
       return NULL;
 
+   if (so)
+      state.stream_output = *so;
+   else
+      memset(&state.stream_output, 0, sizeof(state.stream_output));
+
    if (ureg->processor == TGSI_PROCESSOR_VERTEX)
       return pipe->create_vs_state( pipe, &state );
    else
@@ -1514,13 +1735,35 @@ struct ureg_program *ureg_create( unsigned processor )
 {
    struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
    if (ureg == NULL)
-      return NULL;
+      goto no_ureg;
 
    ureg->processor = processor;
    ureg->property_gs_input_prim = ~0;
    ureg->property_gs_output_prim = ~0;
    ureg->property_gs_max_vertices = ~0;
+
+   ureg->free_temps = util_bitmask_create();
+   if (ureg->free_temps == NULL)
+      goto no_free_temps;
+
+   ureg->local_temps = util_bitmask_create();
+   if (ureg->local_temps == NULL)
+      goto no_local_temps;
+
+   ureg->decl_temps = util_bitmask_create();
+   if (ureg->decl_temps == NULL)
+      goto no_decl_temps;
+
    return ureg;
+
+no_decl_temps:
+   util_bitmask_destroy(ureg->local_temps);
+no_local_temps:
+   util_bitmask_destroy(ureg->free_temps);
+no_free_temps:
+   FREE(ureg);
+no_ureg:
+   return NULL;
 }
 
 
@@ -1533,6 +1776,10 @@ void ureg_destroy( struct ureg_program *ureg )
           ureg->domain[i].tokens != error_tokens)
          FREE(ureg->domain[i].tokens);
    }
-   
+
+   util_bitmask_destroy(ureg->free_temps);
+   util_bitmask_destroy(ureg->local_temps);
+   util_bitmask_destroy(ureg->decl_temps);
+
    FREE(ureg);
 }