tgsi: allow negation of all integer types
[mesa.git] / src / gallium / auxiliary / tgsi / tgsi_ureg.c
index 2b51672b8ea3e797d2522ef51786e93c7edf3d8c..5a28b89b58ea8c3201a2bab737e39d628e5e1ac7 100644 (file)
@@ -1,6 +1,6 @@
 /**************************************************************************
  * 
- * Copyright 2009 VMware, Inc.
+ * Copyright 2009-2010 VMware, Inc.
  * All Rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_sanity.h"
+#include "util/u_debug.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_bitmask.h"
 
 union tgsi_any_token {
    struct tgsi_header header;
    struct tgsi_processor processor;
    struct tgsi_token token;
+   struct tgsi_property prop;
+   struct tgsi_property_data prop_data;
    struct tgsi_declaration decl;
    struct tgsi_declaration_range decl_range;
+   struct tgsi_declaration_dimension decl_dim;
+   struct tgsi_declaration_interp decl_interp;
    struct tgsi_declaration_semantic decl_semantic;
+   struct tgsi_declaration_sampler_view decl_sampler_view;
+   struct tgsi_declaration_array array;
    struct tgsi_immediate imm;
    union  tgsi_immediate_data imm_data;
    struct tgsi_instruction insn;
    struct tgsi_instruction_predicate insn_predicate;
    struct tgsi_instruction_label insn_label;
    struct tgsi_instruction_texture insn_texture;
+   struct tgsi_texture_offset insn_texture_offset;
    struct tgsi_src_register src;
+   struct tgsi_ind_register ind;
    struct tgsi_dimension dim;
    struct tgsi_dst_register dst;
    unsigned value;
@@ -65,13 +75,20 @@ struct ureg_tokens {
 
 #define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
 #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
-#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
+#define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS
 #define UREG_MAX_CONSTANT_RANGE 32
-#define UREG_MAX_IMMEDIATE 32
-#define UREG_MAX_TEMP 256
+#define UREG_MAX_IMMEDIATE 256
 #define UREG_MAX_ADDR 2
-#define UREG_MAX_LOOP 1
 #define UREG_MAX_PRED 1
+#define UREG_MAX_ARRAY_TEMPS 256
+
+struct const_decl {
+   struct {
+      unsigned first;
+      unsigned last;
+   } constant_range[UREG_MAX_CONSTANT_RANGE];
+   unsigned nr_constant_ranges;
+};
 
 #define DOMAIN_DECL 0
 #define DOMAIN_INSN 1
@@ -85,6 +102,8 @@ struct ureg_program
       unsigned semantic_name;
       unsigned semantic_index;
       unsigned interp;
+      unsigned char cylindrical_wrap;
+      unsigned char centroid;
    } fs_input[UREG_MAX_INPUT];
    unsigned nr_fs_inputs;
 
@@ -92,6 +111,8 @@ struct ureg_program
 
    struct {
       unsigned index;
+      unsigned semantic_name;
+      unsigned semantic_index;
    } gs_input[UREG_MAX_INPUT];
    unsigned nr_gs_inputs;
 
@@ -105,6 +126,7 @@ struct ureg_program
    struct {
       unsigned semantic_name;
       unsigned semantic_index;
+      unsigned usage_mask; /* = TGSI_WRITEMASK_* */
    } output[UREG_MAX_OUTPUT];
    unsigned nr_outputs;
 
@@ -122,18 +144,37 @@ struct ureg_program
    struct ureg_src sampler[PIPE_MAX_SAMPLERS];
    unsigned nr_samplers;
 
-   unsigned temps_active[UREG_MAX_TEMP / 32];
+   struct {
+      unsigned index;
+      unsigned target;
+      unsigned return_type_x;
+      unsigned return_type_y;
+      unsigned return_type_z;
+      unsigned return_type_w;
+   } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_sampler_views;
+
+   struct util_bitmask *free_temps;
+   struct util_bitmask *local_temps;
+   struct util_bitmask *decl_temps;
    unsigned nr_temps;
 
-   struct {
-      unsigned first;
-      unsigned last;
-   } constant_range[UREG_MAX_CONSTANT_RANGE];
-   unsigned nr_constant_ranges;
+   unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
+   unsigned nr_array_temps;
+
+   struct const_decl const_decls;
+   struct const_decl const_decls2D[PIPE_MAX_CONSTANT_BUFFERS];
+
+   unsigned property_gs_input_prim;
+   unsigned property_gs_output_prim;
+   unsigned property_gs_max_vertices;
+   unsigned char property_fs_coord_origin; /* = TGSI_FS_COORD_ORIGIN_* */
+   unsigned char property_fs_coord_pixel_center; /* = TGSI_FS_COORD_PIXEL_CENTER_* */
+   unsigned char property_fs_color0_writes_all_cbufs; /* = TGSI_FS_COLOR0_WRITES_ALL_CBUFS * */
+   unsigned char property_fs_depth_layout; /* TGSI_FS_DEPTH_LAYOUT */
 
    unsigned nr_addrs;
    unsigned nr_preds;
-   unsigned nr_loops;
    unsigned nr_instructions;
 
    struct ureg_tokens domain[2];
@@ -227,61 +268,91 @@ ureg_dst_register( unsigned file,
    dst.PredSwizzleZ = TGSI_SWIZZLE_Z;
    dst.PredSwizzleW = TGSI_SWIZZLE_W;
    dst.Index     = index;
+   dst.ArrayID = 0;
 
    return dst;
 }
 
-static INLINE struct ureg_src 
-ureg_src_register( unsigned file,
-                   unsigned index )
+
+void
+ureg_property_gs_input_prim(struct ureg_program *ureg,
+                            unsigned input_prim)
 {
-   struct ureg_src src;
+   ureg->property_gs_input_prim = input_prim;
+}
 
-   src.File     = file;
-   src.SwizzleX = TGSI_SWIZZLE_X;
-   src.SwizzleY = TGSI_SWIZZLE_Y;
-   src.SwizzleZ = TGSI_SWIZZLE_Z;
-   src.SwizzleW = TGSI_SWIZZLE_W;
-   src.Pad      = 0;
-   src.Indirect = 0;
-   src.IndirectIndex = 0;
-   src.IndirectSwizzle = 0;
-   src.Absolute = 0;
-   src.Index    = index;
-   src.Negate   = 0;
+void
+ureg_property_gs_output_prim(struct ureg_program *ureg,
+                             unsigned output_prim)
+{
+   ureg->property_gs_output_prim = output_prim;
+}
 
-   return src;
+void
+ureg_property_gs_max_vertices(struct ureg_program *ureg,
+                              unsigned max_vertices)
+{
+   ureg->property_gs_max_vertices = max_vertices;
 }
 
+void
+ureg_property_fs_coord_origin(struct ureg_program *ureg,
+                            unsigned fs_coord_origin)
+{
+   ureg->property_fs_coord_origin = fs_coord_origin;
+}
+
+void
+ureg_property_fs_coord_pixel_center(struct ureg_program *ureg,
+                            unsigned fs_coord_pixel_center)
+{
+   ureg->property_fs_coord_pixel_center = fs_coord_pixel_center;
+}
 
+void
+ureg_property_fs_color0_writes_all_cbufs(struct ureg_program *ureg,
+                            unsigned fs_color0_writes_all_cbufs)
+{
+   ureg->property_fs_color0_writes_all_cbufs = fs_color0_writes_all_cbufs;
+}
 
+void
+ureg_property_fs_depth_layout(struct ureg_program *ureg,
+                              unsigned fs_depth_layout)
+{
+   ureg->property_fs_depth_layout = fs_depth_layout;
+}
 
-struct ureg_src 
-ureg_DECL_fs_input( struct ureg_program *ureg,
-                    unsigned name,
-                    unsigned index,
-                    unsigned interp_mode )
+struct ureg_src
+ureg_DECL_fs_input_cyl_centroid(struct ureg_program *ureg,
+                       unsigned semantic_name,
+                       unsigned semantic_index,
+                       unsigned interp_mode,
+                       unsigned cylindrical_wrap,
+                       unsigned centroid)
 {
    unsigned i;
 
    for (i = 0; i < ureg->nr_fs_inputs; i++) {
-      if (ureg->fs_input[i].semantic_name == name &&
-          ureg->fs_input[i].semantic_index == index) 
+      if (ureg->fs_input[i].semantic_name == semantic_name &&
+          ureg->fs_input[i].semantic_index == semantic_index) {
          goto out;
+      }
    }
 
    if (ureg->nr_fs_inputs < UREG_MAX_INPUT) {
-      ureg->fs_input[i].semantic_name = name;
-      ureg->fs_input[i].semantic_index = index;
+      ureg->fs_input[i].semantic_name = semantic_name;
+      ureg->fs_input[i].semantic_index = semantic_index;
       ureg->fs_input[i].interp = interp_mode;
+      ureg->fs_input[i].cylindrical_wrap = cylindrical_wrap;
+      ureg->fs_input[i].centroid = centroid;
       ureg->nr_fs_inputs++;
-   }
-   else {
-      set_bad( ureg );
+   } else {
+      set_bad(ureg);
    }
 
 out:
-   return ureg_src_register( TGSI_FILE_INPUT, i );
+   return ureg_src_register(TGSI_FILE_INPUT, i);
 }
 
 
@@ -298,10 +369,14 @@ ureg_DECL_vs_input( struct ureg_program *ureg,
 
 struct ureg_src
 ureg_DECL_gs_input(struct ureg_program *ureg,
-                   unsigned index)
+                   unsigned index,
+                   unsigned semantic_name,
+                   unsigned semantic_index)
 {
    if (ureg->nr_gs_inputs < UREG_MAX_INPUT) {
       ureg->gs_input[ureg->nr_gs_inputs].index = index;
+      ureg->gs_input[ureg->nr_gs_inputs].semantic_name = semantic_name;
+      ureg->gs_input[ureg->nr_gs_inputs].semantic_index = semantic_index;
       ureg->nr_gs_inputs++;
    } else {
       set_bad(ureg);
@@ -332,21 +407,27 @@ ureg_DECL_system_value(struct ureg_program *ureg,
 
 
 struct ureg_dst 
-ureg_DECL_output( struct ureg_program *ureg,
-                  unsigned name,
-                  unsigned index )
+ureg_DECL_output_masked( struct ureg_program *ureg,
+                         unsigned name,
+                         unsigned index,
+                         unsigned usage_mask )
 {
    unsigned i;
 
+   assert(usage_mask != 0);
+
    for (i = 0; i < ureg->nr_outputs; i++) {
       if (ureg->output[i].semantic_name == name &&
-          ureg->output[i].semantic_index == index) 
+          ureg->output[i].semantic_index == index) { 
+         ureg->output[i].usage_mask |= usage_mask;
          goto out;
+      }
    }
 
    if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
       ureg->output[i].semantic_name = name;
       ureg->output[i].semantic_index = index;
+      ureg->output[i].usage_mask = usage_mask;
       ureg->nr_outputs++;
    }
    else {
@@ -358,104 +439,179 @@ out:
 }
 
 
+struct ureg_dst 
+ureg_DECL_output( struct ureg_program *ureg,
+                  unsigned name,
+                  unsigned index )
+{
+   return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW);
+}
+
+
 /* Returns a new constant register.  Keep track of which have been
  * referred to so that we can emit decls later.
  *
+ * Constant operands declared with this function must be addressed
+ * with a two-dimensional index.
+ *
  * There is nothing in this code to bind this constant to any tracked
  * value or manage any constant_buffer contents -- that's the
  * resposibility of the calling code.
  */
-struct ureg_src ureg_DECL_constant(struct ureg_program *ureg, 
-                                   unsigned index )
+void
+ureg_DECL_constant2D(struct ureg_program *ureg,
+                     unsigned first,
+                     unsigned last,
+                     unsigned index2D)
 {
+   struct const_decl *decl = &ureg->const_decls2D[index2D];
+
+   assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
+
+   if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
+      uint i = decl->nr_constant_ranges++;
+
+      decl->constant_range[i].first = first;
+      decl->constant_range[i].last = last;
+   }
+}
+
+
+/* A one-dimensional, depricated version of ureg_DECL_constant2D().
+ *
+ * Constant operands declared with this function must be addressed
+ * with a one-dimensional index.
+ */
+struct ureg_src
+ureg_DECL_constant(struct ureg_program *ureg,
+                   unsigned index)
+{
+   struct const_decl *decl = &ureg->const_decls;
    unsigned minconst = index, maxconst = index;
    unsigned i;
 
    /* Inside existing range?
     */
-   for (i = 0; i < ureg->nr_constant_ranges; i++) {
-      if (ureg->constant_range[i].first <= index &&
-          ureg->constant_range[i].last >= index)
+   for (i = 0; i < decl->nr_constant_ranges; i++) {
+      if (decl->constant_range[i].first <= index &&
+          decl->constant_range[i].last >= index) {
          goto out;
+      }
    }
 
    /* Extend existing range?
     */
-   for (i = 0; i < ureg->nr_constant_ranges; i++) {
-      if (ureg->constant_range[i].last == index - 1) {
-         ureg->constant_range[i].last = index;
+   for (i = 0; i < decl->nr_constant_ranges; i++) {
+      if (decl->constant_range[i].last == index - 1) {
+         decl->constant_range[i].last = index;
          goto out;
       }
 
-      if (ureg->constant_range[i].first == index + 1) {
-         ureg->constant_range[i].first = index;
+      if (decl->constant_range[i].first == index + 1) {
+         decl->constant_range[i].first = index;
          goto out;
       }
 
-      minconst = MIN2(minconst, ureg->constant_range[i].first);
-      maxconst = MAX2(maxconst, ureg->constant_range[i].last);
+      minconst = MIN2(minconst, decl->constant_range[i].first);
+      maxconst = MAX2(maxconst, decl->constant_range[i].last);
    }
 
    /* Create new range?
     */
-   if (ureg->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
-      i = ureg->nr_constant_ranges++;
-      ureg->constant_range[i].first = index;
-      ureg->constant_range[i].last = index;
+   if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
+      i = decl->nr_constant_ranges++;
+      decl->constant_range[i].first = index;
+      decl->constant_range[i].last = index;
       goto out;
    }
 
    /* Collapse all ranges down to one:
     */
    i = 0;
-   ureg->constant_range[0].first = minconst;
-   ureg->constant_range[0].last = maxconst;
-   ureg->nr_constant_ranges = 1;
+   decl->constant_range[0].first = minconst;
+   decl->constant_range[0].last = maxconst;
+   decl->nr_constant_ranges = 1;
 
 out:
-   assert(i < ureg->nr_constant_ranges);
-   assert(ureg->constant_range[i].first <= index);
-   assert(ureg->constant_range[i].last >= index);
-   return ureg_src_register( TGSI_FILE_CONSTANT, index );
+   assert(i < decl->nr_constant_ranges);
+   assert(decl->constant_range[i].first <= index);
+   assert(decl->constant_range[i].last >= index);
+   return ureg_src_register(TGSI_FILE_CONSTANT, index);
 }
 
-
-/* Allocate a new temporary.  Temporaries greater than UREG_MAX_TEMP
- * are legal, but will not be released.
- */
-struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
+static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
+                                        boolean local )
 {
    unsigned i;
 
-   for (i = 0; i < UREG_MAX_TEMP; i += 32) {
-      int bit = ffs(~ureg->temps_active[i/32]);
-      if (bit != 0) {
-         i += bit - 1;
-         goto out;
-      }
+   /* Look for a released temporary.
+    */
+   for (i = util_bitmask_get_first_index(ureg->free_temps);
+        i != UTIL_BITMASK_INVALID_INDEX;
+        i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
+      if (util_bitmask_get(ureg->local_temps, i) == local)
+         break;
    }
 
-   /* No reusable temps, so allocate a new one:
+   /* Or allocate a new one.
     */
-   i = ureg->nr_temps++;
+   if (i == UTIL_BITMASK_INVALID_INDEX) {
+      i = ureg->nr_temps++;
 
-out:
-   if (i < UREG_MAX_TEMP)
-      ureg->temps_active[i/32] |= 1 << (i % 32);
+      if (local)
+         util_bitmask_set(ureg->local_temps, i);
+
+      /* Start a new declaration when the local flag changes */
+      if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local)
+         util_bitmask_set(ureg->decl_temps, i);
+   }
 
-   if (i >= ureg->nr_temps)
-      ureg->nr_temps = i + 1;
+   util_bitmask_clear(ureg->free_temps, i);
 
    return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
 }
 
+struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
+{
+   return alloc_temporary(ureg, FALSE);
+}
+
+struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
+{
+   return alloc_temporary(ureg, TRUE);
+}
+
+struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
+                                           unsigned size,
+                                           boolean local )
+{
+   unsigned i = ureg->nr_temps;
+   struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
+
+   if (local)
+      util_bitmask_set(ureg->local_temps, i);
+
+   /* Always start a new declaration at the start */
+   util_bitmask_set(ureg->decl_temps, i);
+
+   ureg->nr_temps += size;
+
+   /* and also at the end of the array */
+   util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
+
+   if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
+      ureg->array_temps[ureg->nr_array_temps++] = i;
+      dst.ArrayID = ureg->nr_array_temps;
+   }
+
+   return dst;
+}
 
 void ureg_release_temporary( struct ureg_program *ureg,
                              struct ureg_dst tmp )
 {
    if(tmp.File == TGSI_FILE_TEMPORARY)
-      if (tmp.Index < UREG_MAX_TEMP)
-         ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32));
+      util_bitmask_set(ureg->free_temps, tmp.Index);
 }
 
 
@@ -470,19 +626,6 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
    return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
 }
 
-/* Allocate a new loop register.
- */
-struct ureg_dst
-ureg_DECL_loop(struct ureg_program *ureg)
-{
-   if (ureg->nr_loops < UREG_MAX_LOOP) {
-      return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
-   }
-
-   assert(0);
-   return ureg_dst_register(TGSI_FILE_LOOP, 0);
-}
-
 /* Allocate a new predicate register.
  */
 struct ureg_dst
@@ -517,6 +660,41 @@ struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
    return ureg->sampler[0];
 }
 
+/*
+ * Allocate a new shader sampler view.
+ */
+struct ureg_src
+ureg_DECL_sampler_view(struct ureg_program *ureg,
+                       unsigned index,
+                       unsigned target,
+                       unsigned return_type_x,
+                       unsigned return_type_y,
+                       unsigned return_type_z,
+                       unsigned return_type_w)
+{
+   struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
+   uint i;
+
+   for (i = 0; i < ureg->nr_sampler_views; i++) {
+      if (ureg->sampler_view[i].index == index) {
+         return reg;
+      }
+   }
+
+   if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
+      ureg->sampler_view[i].index = index;
+      ureg->sampler_view[i].target = target;
+      ureg->sampler_view[i].return_type_x = return_type_x;
+      ureg->sampler_view[i].return_type_y = return_type_y;
+      ureg->sampler_view[i].return_type_z = return_type_z;
+      ureg->sampler_view[i].return_type_w = return_type_w;
+      ureg->nr_sampler_views++;
+      return reg;
+   }
+
+   assert(0);
+   return reg;
+}
 
 static int
 match_or_expand_immediate( const unsigned *v,
@@ -565,7 +743,7 @@ decl_immediate( struct ureg_program *ureg,
                 unsigned type )
 {
    unsigned i, j;
-   unsigned swizzle;
+   unsigned swizzle = 0;
 
    /* Could do a first pass where we examine all existing immediates
     * without expanding.
@@ -642,6 +820,35 @@ ureg_DECL_immediate_uint( struct ureg_program *ureg,
 }
 
 
+struct ureg_src
+ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
+                                const unsigned *v,
+                                unsigned nr )
+{
+   uint index;
+   uint i;
+
+   if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
+      set_bad(ureg);
+      return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
+   }
+
+   index = ureg->nr_immediates;
+   ureg->nr_immediates += (nr + 3) / 4;
+
+   for (i = index; i < ureg->nr_immediates; i++) {
+      ureg->immediate[i].type = TGSI_IMM_UINT32;
+      ureg->immediate[i].nr = nr > 4 ? 4 : nr;
+      memcpy(ureg->immediate[i].value.u,
+             &v[(i - index) * 4],
+             ureg->immediate[i].nr * sizeof(uint));
+      nr -= 4;
+   }
+
+   return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
+}
+
+
 struct ureg_src
 ureg_DECL_immediate_int( struct ureg_program *ureg,
                          const int *v,
@@ -651,17 +858,17 @@ ureg_DECL_immediate_int( struct ureg_program *ureg,
 }
 
 
-void 
+void
 ureg_emit_src( struct ureg_program *ureg,
                struct ureg_src src )
 {
-   unsigned size = 1 + (src.Indirect ? 1 : 0);
+   unsigned size = 1 + (src.Indirect ? 1 : 0) +
+                   (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0);
 
    union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
    unsigned n = 0;
 
    assert(src.File != TGSI_FILE_NULL);
-   assert(src.File != TGSI_FILE_OUTPUT);
    assert(src.File < TGSI_FILE_COUNT);
    
    out[n].value = 0;
@@ -678,12 +885,30 @@ ureg_emit_src( struct ureg_program *ureg,
    if (src.Indirect) {
       out[0].src.Indirect = 1;
       out[n].value = 0;
-      out[n].src.File = TGSI_FILE_ADDRESS;
-      out[n].src.SwizzleX = src.IndirectSwizzle;
-      out[n].src.SwizzleY = src.IndirectSwizzle;
-      out[n].src.SwizzleZ = src.IndirectSwizzle;
-      out[n].src.SwizzleW = src.IndirectSwizzle;
-      out[n].src.Index = src.IndirectIndex;
+      out[n].ind.File = src.IndirectFile;
+      out[n].ind.Swizzle = src.IndirectSwizzle;
+      out[n].ind.Index = src.IndirectIndex;
+      out[n].ind.ArrayID = src.ArrayID;
+      n++;
+   }
+
+   if (src.Dimension) {
+      out[0].src.Dimension = 1;
+      out[n].dim.Dimension = 0;
+      out[n].dim.Padding = 0;
+      if (src.DimIndirect) {
+         out[n].dim.Indirect = 1;
+         out[n].dim.Index = src.DimensionIndex;
+         n++;
+         out[n].value = 0;
+         out[n].ind.File = src.DimIndFile;
+         out[n].ind.Swizzle = src.DimIndSwizzle;
+         out[n].ind.Index = src.DimIndIndex;
+         out[n].ind.ArrayID = src.ArrayID;
+      } else {
+         out[n].dim.Indirect = 0;
+         out[n].dim.Index = src.DimensionIndex;
+      }
       n++;
    }
 
@@ -705,9 +930,10 @@ ureg_emit_dst( struct ureg_program *ureg,
    assert(dst.File != TGSI_FILE_CONSTANT);
    assert(dst.File != TGSI_FILE_INPUT);
    assert(dst.File != TGSI_FILE_SAMPLER);
+   assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
    assert(dst.File != TGSI_FILE_IMMEDIATE);
    assert(dst.File < TGSI_FILE_COUNT);
-   
+
    out[n].value = 0;
    out[n].dst.File = dst.File;
    out[n].dst.WriteMask = dst.WriteMask;
@@ -717,12 +943,10 @@ ureg_emit_dst( struct ureg_program *ureg,
    
    if (dst.Indirect) {
       out[n].value = 0;
-      out[n].src.File = TGSI_FILE_ADDRESS;
-      out[n].src.SwizzleX = dst.IndirectSwizzle;
-      out[n].src.SwizzleY = dst.IndirectSwizzle;
-      out[n].src.SwizzleZ = dst.IndirectSwizzle;
-      out[n].src.SwizzleW = dst.IndirectSwizzle;
-      out[n].src.Index = dst.IndirectIndex;
+      out[n].ind.File = TGSI_FILE_ADDRESS;
+      out[n].ind.Swizzle = dst.IndirectSwizzle;
+      out[n].ind.Index = dst.IndirectIndex;
+      out[n].ind.ArrayID = dst.ArrayID;
       n++;
    }
 
@@ -834,7 +1058,7 @@ ureg_fixup_label(struct ureg_program *ureg,
 void
 ureg_emit_texture(struct ureg_program *ureg,
                   unsigned extended_token,
-                  unsigned target )
+                  unsigned target, unsigned num_offsets)
 {
    union tgsi_any_token *out, *insn;
 
@@ -845,6 +1069,20 @@ ureg_emit_texture(struct ureg_program *ureg,
 
    out[0].value = 0;
    out[0].insn_texture.Texture = target;
+   out[0].insn_texture.NumOffsets = num_offsets;
+}
+
+void
+ureg_emit_texture_offset(struct ureg_program *ureg,
+                         const struct tgsi_texture_offset *offset)
+{
+   union tgsi_any_token *out;
+
+   out = get_tokens( ureg, DOMAIN_INSN, 1);
+
+   out[0].value = 0;
+   out[0].insn_texture_offset = *offset;
+   
 }
 
 
@@ -911,6 +1149,8 @@ ureg_tex_insn(struct ureg_program *ureg,
               const struct ureg_dst *dst,
               unsigned nr_dst,
               unsigned target,
+              const struct tgsi_texture_offset *texoffsets,
+              unsigned nr_offset,
               const struct ureg_src *src,
               unsigned nr_src )
 {
@@ -943,7 +1183,10 @@ ureg_tex_insn(struct ureg_program *ureg,
                          nr_dst,
                          nr_src);
 
-   ureg_emit_texture( ureg, insn.extended_token, target );
+   ureg_emit_texture( ureg, insn.extended_token, target, nr_offset );
+
+   for (i = 0; i < nr_offset; i++)
+      ureg_emit_texture_offset( ureg, &texoffsets[i]);
 
    for (i = 0; i < nr_dst; i++)
       ureg_emit_dst( ureg, dst[i] );
@@ -986,34 +1229,93 @@ ureg_label_insn(struct ureg_program *ureg,
 }
 
 
-
-static void emit_decl( struct ureg_program *ureg,
-                       unsigned file,
-                       unsigned index,
-                       unsigned semantic_name,
-                       unsigned semantic_index,
-                       unsigned interp )
+static void
+emit_decl_semantic(struct ureg_program *ureg,
+                   unsigned file,
+                   unsigned index,
+                   unsigned semantic_name,
+                   unsigned semantic_index,
+                   unsigned usage_mask)
 {
-   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 );
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
 
    out[0].value = 0;
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
    out[0].decl.NrTokens = 3;
    out[0].decl.File = file;
-   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
-   out[0].decl.Interpolate = interp;
+   out[0].decl.UsageMask = usage_mask;
    out[0].decl.Semantic = 1;
 
    out[1].value = 0;
-   out[1].decl_range.First = 
-      out[1].decl_range.Last = index;
+   out[1].decl_range.First = index;
+   out[1].decl_range.Last = index;
 
    out[2].value = 0;
    out[2].decl_semantic.Name = semantic_name;
    out[2].decl_semantic.Index = semantic_index;
+}
+
+
+static void
+emit_decl_fs(struct ureg_program *ureg,
+             unsigned file,
+             unsigned index,
+             unsigned semantic_name,
+             unsigned semantic_index,
+             unsigned interpolate,
+             unsigned cylindrical_wrap,
+             unsigned centroid)
+{
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 4);
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 4;
+   out[0].decl.File = file;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
+   out[0].decl.Interpolate = 1;
+   out[0].decl.Semantic = 1;
+
+   out[1].value = 0;
+   out[1].decl_range.First = index;
+   out[1].decl_range.Last = index;
 
+   out[2].value = 0;
+   out[2].decl_interp.Interpolate = interpolate;
+   out[2].decl_interp.CylindricalWrap = cylindrical_wrap;
+   out[2].decl_interp.Centroid = centroid;
+
+   out[3].value = 0;
+   out[3].decl_semantic.Name = semantic_name;
+   out[3].decl_semantic.Index = semantic_index;
 }
 
+static void
+emit_decl_temps( struct ureg_program *ureg,
+                 unsigned first, unsigned last,
+                 boolean local,
+                 unsigned arrayid )
+{
+   union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL,
+                                           arrayid ? 3 : 2 );
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 2;
+   out[0].decl.File = TGSI_FILE_TEMPORARY;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+   out[0].decl.Local = local;
+
+   out[1].value = 0;
+   out[1].decl_range.First = first;
+   out[1].decl_range.Last = last;
+
+   if (arrayid) {
+      out[0].decl.Array = 1;
+      out[2].value = 0;
+      out[2].array.ArrayID = arrayid;
+   }
+}
 
 static void emit_decl_range( struct ureg_program *ureg,
                              unsigned file,
@@ -1026,8 +1328,7 @@ static void emit_decl_range( struct ureg_program *ureg,
    out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
    out[0].decl.NrTokens = 2;
    out[0].decl.File = file;
-   out[0].decl.UsageMask = 0xf;
-   out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
    out[0].decl.Semantic = 0;
 
    out[1].value = 0;
@@ -1035,6 +1336,59 @@ static void emit_decl_range( struct ureg_program *ureg,
    out[1].decl_range.Last = first + count - 1;
 }
 
+static void
+emit_decl_range2D(struct ureg_program *ureg,
+                  unsigned file,
+                  unsigned first,
+                  unsigned last,
+                  unsigned index2D)
+{
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 3;
+   out[0].decl.File = file;
+   out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
+   out[0].decl.Dimension = 1;
+
+   out[1].value = 0;
+   out[1].decl_range.First = first;
+   out[1].decl_range.Last = last;
+
+   out[2].value = 0;
+   out[2].decl_dim.Index2D = index2D;
+}
+
+static void
+emit_decl_sampler_view(struct ureg_program *ureg,
+                       unsigned index,
+                       unsigned target,
+                       unsigned return_type_x,
+                       unsigned return_type_y,
+                       unsigned return_type_z,
+                       unsigned return_type_w )
+{
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
+
+   out[0].value = 0;
+   out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+   out[0].decl.NrTokens = 3;
+   out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
+   out[0].decl.UsageMask = 0xf;
+
+   out[1].value = 0;
+   out[1].decl_range.First = index;
+   out[1].decl_range.Last = index;
+
+   out[2].value = 0;
+   out[2].decl_sampler_view.Resource    = target;
+   out[2].decl_sampler_view.ReturnTypeX = return_type_x;
+   out[2].decl_sampler_view.ReturnTypeY = return_type_y;
+   out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
+   out[2].decl_sampler_view.ReturnTypeW = return_type_w;
+}
+
 static void
 emit_immediate( struct ureg_program *ureg,
                 const unsigned *v,
@@ -1054,13 +1408,82 @@ emit_immediate( struct ureg_program *ureg,
    out[4].imm_data.Uint = v[3];
 }
 
+static void
+emit_property(struct ureg_program *ureg,
+              unsigned name,
+              unsigned data)
+{
+   union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
+
+   out[0].value = 0;
+   out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
+   out[0].prop.NrTokens = 2;
+   out[0].prop.PropertyName = name;
 
+   out[1].prop_data.Data = data;
+}
 
 
 static void emit_decls( struct ureg_program *ureg )
 {
    unsigned i;
 
+   if (ureg->property_gs_input_prim != ~0) {
+      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_GS_INPUT_PRIM,
+                    ureg->property_gs_input_prim);
+   }
+
+   if (ureg->property_gs_output_prim != ~0) {
+      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_GS_OUTPUT_PRIM,
+                    ureg->property_gs_output_prim);
+   }
+
+   if (ureg->property_gs_max_vertices != ~0) {
+      assert(ureg->processor == TGSI_PROCESSOR_GEOMETRY);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
+                    ureg->property_gs_max_vertices);
+   }
+
+   if (ureg->property_fs_coord_origin) {
+      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_FS_COORD_ORIGIN,
+                    ureg->property_fs_coord_origin);
+   }
+
+   if (ureg->property_fs_coord_pixel_center) {
+      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
+                    ureg->property_fs_coord_pixel_center);
+   }
+
+   if (ureg->property_fs_color0_writes_all_cbufs) {
+      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS,
+                    ureg->property_fs_color0_writes_all_cbufs);
+   }
+
+   if (ureg->property_fs_depth_layout) {
+      assert(ureg->processor == TGSI_PROCESSOR_FRAGMENT);
+
+      emit_property(ureg,
+                    TGSI_PROPERTY_FS_DEPTH_LAYOUT,
+                    ureg->property_fs_depth_layout);
+   }
+
    if (ureg->processor == TGSI_PROCESSOR_VERTEX) {
       for (i = 0; i < UREG_MAX_INPUT; i++) {
          if (ureg->vs_inputs[i/32] & (1 << (i%32))) {
@@ -1069,38 +1492,42 @@ static void emit_decls( struct ureg_program *ureg )
       }
    } else if (ureg->processor == TGSI_PROCESSOR_FRAGMENT) {
       for (i = 0; i < ureg->nr_fs_inputs; i++) {
-         emit_decl( ureg, 
-                    TGSI_FILE_INPUT, 
-                    i,
-                    ureg->fs_input[i].semantic_name,
-                    ureg->fs_input[i].semantic_index,
-                    ureg->fs_input[i].interp );
+         emit_decl_fs(ureg,
+                      TGSI_FILE_INPUT,
+                      i,
+                      ureg->fs_input[i].semantic_name,
+                      ureg->fs_input[i].semantic_index,
+                      ureg->fs_input[i].interp,
+                      ureg->fs_input[i].cylindrical_wrap,
+                      ureg->fs_input[i].centroid);
       }
    } else {
       for (i = 0; i < ureg->nr_gs_inputs; i++) {
-         emit_decl_range(ureg, 
-                         TGSI_FILE_INPUT, 
-                         ureg->gs_input[i].index,
-                         1);
+         emit_decl_semantic(ureg,
+                            TGSI_FILE_INPUT,
+                            ureg->gs_input[i].index,
+                            ureg->gs_input[i].semantic_name,
+                            ureg->gs_input[i].semantic_index,
+                            TGSI_WRITEMASK_XYZW);
       }
    }
 
    for (i = 0; i < ureg->nr_system_values; i++) {
-      emit_decl(ureg,
-                TGSI_FILE_SYSTEM_VALUE,
-                ureg->system_value[i].index,
-                ureg->system_value[i].semantic_name,
-                ureg->system_value[i].semantic_index,
-                TGSI_INTERPOLATE_CONSTANT);
+      emit_decl_semantic(ureg,
+                         TGSI_FILE_SYSTEM_VALUE,
+                         ureg->system_value[i].index,
+                         ureg->system_value[i].semantic_name,
+                         ureg->system_value[i].semantic_index,
+                         TGSI_WRITEMASK_XYZW);
    }
 
    for (i = 0; i < ureg->nr_outputs; i++) {
-      emit_decl( ureg, 
-                 TGSI_FILE_OUTPUT, 
-                 i,
-                 ureg->output[i].semantic_name,
-                 ureg->output[i].semantic_index,
-                 TGSI_INTERPOLATE_CONSTANT );
+      emit_decl_semantic(ureg,
+                         TGSI_FILE_OUTPUT,
+                         i,
+                         ureg->output[i].semantic_name,
+                         ureg->output[i].semantic_index,
+                         ureg->output[i].usage_mask);
    }
 
    for (i = 0; i < ureg->nr_samplers; i++) {
@@ -1109,19 +1536,55 @@ static void emit_decls( struct ureg_program *ureg )
                        ureg->sampler[i].Index, 1 );
    }
 
-   if (ureg->nr_constant_ranges) {
-      for (i = 0; i < ureg->nr_constant_ranges; i++)
-         emit_decl_range( ureg,
-                          TGSI_FILE_CONSTANT,
-                          ureg->constant_range[i].first, 
-                          (ureg->constant_range[i].last + 1 -
-                           ureg->constant_range[i].first) );
+   for (i = 0; i < ureg->nr_sampler_views; i++) {
+      emit_decl_sampler_view(ureg,
+                             ureg->sampler_view[i].index,
+                             ureg->sampler_view[i].target,
+                             ureg->sampler_view[i].return_type_x,
+                             ureg->sampler_view[i].return_type_y,
+                             ureg->sampler_view[i].return_type_z,
+                             ureg->sampler_view[i].return_type_w);
+   }
+
+   if (ureg->const_decls.nr_constant_ranges) {
+      for (i = 0; i < ureg->const_decls.nr_constant_ranges; i++) {
+         emit_decl_range(ureg,
+                         TGSI_FILE_CONSTANT,
+                         ureg->const_decls.constant_range[i].first,
+                         ureg->const_decls.constant_range[i].last - ureg->const_decls.constant_range[i].first + 1);
+      }
+   }
+
+   for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
+      struct const_decl *decl = &ureg->const_decls2D[i];
+
+      if (decl->nr_constant_ranges) {
+         uint j;
+
+         for (j = 0; j < decl->nr_constant_ranges; j++) {
+            emit_decl_range2D(ureg,
+                              TGSI_FILE_CONSTANT,
+                              decl->constant_range[j].first,
+                              decl->constant_range[j].last,
+                              i);
+         }
+      }
    }
 
    if (ureg->nr_temps) {
-      emit_decl_range( ureg,
-                       TGSI_FILE_TEMPORARY,
-                       0, ureg->nr_temps );
+      unsigned array = 0;
+      for (i = 0; i < ureg->nr_temps;) {
+         boolean local = util_bitmask_get(ureg->local_temps, i);
+         unsigned first = i;
+         i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
+         if (i == UTIL_BITMASK_INVALID_INDEX)
+            i = ureg->nr_temps;
+
+         if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
+            emit_decl_temps( ureg, first, i - 1, local, ++array );
+         else
+            emit_decl_temps( ureg, first, i - 1, local, 0 );
+      }
    }
 
    if (ureg->nr_addrs) {
@@ -1130,13 +1593,6 @@ static void emit_decls( struct ureg_program *ureg )
                        0, ureg->nr_addrs );
    }
 
-   if (ureg->nr_loops) {
-      emit_decl_range(ureg,
-                      TGSI_FILE_LOOP,
-                      0,
-                      ureg->nr_loops);
-   }
-
    if (ureg->nr_preds) {
       emit_decl_range(ureg,
                       TGSI_FILE_PREDICATE,
@@ -1227,7 +1683,8 @@ const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
 
 
 void *ureg_create_shader( struct ureg_program *ureg,
-                          struct pipe_context *pipe )
+                          struct pipe_context *pipe,
+                          const struct pipe_stream_output_info *so )
 {
    struct pipe_shader_state state;
 
@@ -1235,6 +1692,11 @@ void *ureg_create_shader( struct ureg_program *ureg,
    if(!state.tokens)
       return NULL;
 
+   if (so)
+      state.stream_output = *so;
+   else
+      memset(&state.stream_output, 0, sizeof(state.stream_output));
+
    if (ureg->processor == TGSI_PROCESSOR_VERTEX)
       return pipe->create_vs_state( pipe, &state );
    else
@@ -1263,14 +1725,54 @@ const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
 }
 
 
+void ureg_free_tokens( const struct tgsi_token *tokens )
+{
+   FREE((struct tgsi_token *)tokens);
+}
+
+
 struct ureg_program *ureg_create( unsigned processor )
 {
    struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
    if (ureg == NULL)
-      return NULL;
+      goto no_ureg;
 
    ureg->processor = processor;
+   ureg->property_gs_input_prim = ~0;
+   ureg->property_gs_output_prim = ~0;
+   ureg->property_gs_max_vertices = ~0;
+
+   ureg->free_temps = util_bitmask_create();
+   if (ureg->free_temps == NULL)
+      goto no_free_temps;
+
+   ureg->local_temps = util_bitmask_create();
+   if (ureg->local_temps == NULL)
+      goto no_local_temps;
+
+   ureg->decl_temps = util_bitmask_create();
+   if (ureg->decl_temps == NULL)
+      goto no_decl_temps;
+
    return ureg;
+
+no_decl_temps:
+   util_bitmask_destroy(ureg->local_temps);
+no_local_temps:
+   util_bitmask_destroy(ureg->free_temps);
+no_free_temps:
+   FREE(ureg);
+no_ureg:
+   return NULL;
+}
+
+
+const unsigned
+ureg_get_nr_outputs( const struct ureg_program *ureg )
+{
+   if (!ureg)
+      return 0;
+   return ureg->nr_outputs;
 }
 
 
@@ -1283,6 +1785,10 @@ void ureg_destroy( struct ureg_program *ureg )
           ureg->domain[i].tokens != error_tokens)
          FREE(ureg->domain[i].tokens);
    }
-   
+
+   util_bitmask_destroy(ureg->free_temps);
+   util_bitmask_destroy(ureg->local_temps);
+   util_bitmask_destroy(ureg->decl_temps);
+
    FREE(ureg);
 }