gallium: implement ARB_conservative_depth

[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 2885630c7db36479cd01618a039d5b19f37d08d6..cd4db255f54dff7877269e78e3403cd0ecfad94a 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -42,17 +42,17 @@
  #include "ir_optimization.h"
  #include "ast.h"
  
-extern "C" {
  #include "main/mtypes.h"
-#include "main/shaderapi.h"
  #include "main/shaderobj.h"
-#include "main/uniforms.h"
  #include "program/hash_table.h"
+
+extern "C" {
+#include "main/shaderapi.h"
+#include "main/uniforms.h"
  #include "program/prog_instruction.h"
  #include "program/prog_optimize.h"
  #include "program/prog_print.h"
  #include "program/program.h"
-#include "program/prog_uniform.h"
  #include "program/prog_parameter.h"
  #include "program/sampler.h"
  
@@ -78,8 +78,17 @@ extern "C" {
                             (1 << PROGRAM_CONSTANT) |     \
                             (1 << PROGRAM_UNIFORM))
  
+/**
+ * Maximum number of temporary registers.
+ *
+ * It is too big for stack allocated arrays -- it will cause stack overflow on
+ * Windows and likely Mac OS X.
+ */
  #define MAX_TEMPS         4096
  
+/* will be 4 for GLSL 4.00 */
+#define MAX_GLSL_TEXTURE_OFFSET 1
+
  class st_src_reg;
  class st_dst_reg;
  
@@ -211,6 +220,8 @@ public:
     int sampler; /**< sampler index */
     int tex_target; /**< One of TEXTURE_*_INDEX */
     GLboolean tex_shadow;
+   struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
+   unsigned tex_offset_num_offset;
     int dead_mask; /**< Used in dead code elimination */
  
     class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
@@ -385,14 +396,18 @@ public:
     void emit_scalar(ir_instruction *ir, unsigned op,
                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
  
+   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
+
     void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
  
     void emit_scs(ir_instruction *ir, unsigned op,
                  st_dst_reg dst, const st_src_reg &src);
  
-   GLboolean try_emit_mad(ir_expression *ir,
-                         int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_mad(ir_expression *ir,
+              int mul_operand);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+              int mul_operand);
+   bool try_emit_sat(ir_expression *ir);
  
     void emit_swz(ir_expression *ir);
  
@@ -510,7 +525,7 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  
     inst->function = NULL;
     
-   if (op == TGSI_OPCODE_ARL)
+   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
        this->num_address_regs = 1;
     
     /* Update indirect addressing status used by TGSI */
@@ -560,7 +575,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
     }
  
     this->instructions.push_tail(inst);
-   
+
+   if (native_integers)
+      try_emit_float_set(ir, op, dst);
+
     return inst;
  }
  
@@ -586,11 +604,28 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
     return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
  }
  
+ /**
+ * Emits the code to convert the result of float SET instructions to integers.
+ */
+void
+glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
+                        st_dst_reg dst)
+{
+   if ((op == TGSI_OPCODE_SEQ ||
+        op == TGSI_OPCODE_SNE ||
+        op == TGSI_OPCODE_SGE ||
+        op == TGSI_OPCODE_SLT))
+   {
+      st_src_reg src = st_src_reg(dst);
+      src.negate = ~src.negate;
+      dst.type = GLSL_TYPE_FLOAT;
+      emit(ir, TGSI_OPCODE_F2I, dst, src);
+   }
+}
+
  /**
   * Determines whether to use an integer, unsigned integer, or float opcode 
   * based on the operands and input opcode, then emits the result.
- * 
- * TODO: type checking for remaining TGSI opcodes
   */
  unsigned
  glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
@@ -602,7 +637,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
     if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
        type = GLSL_TYPE_FLOAT;
     else if (native_integers)
-      type = src0.type;
+      type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
  
  #define case4(c, f, i, u) \
     case TGSI_OPCODE_##c: \
@@ -628,12 +663,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
        case3(SGE, ISGE, USGE);
        case3(SLT, ISLT, USLT);
        
-      case2iu(SHL, SHL);
        case2iu(ISHR, USHR);
-      case2iu(NOT, NOT);
-      case2iu(AND, AND);
-      case2iu(OR, OR);
-      case2iu(XOR, XOR);
        
        default: break;
     }
@@ -722,16 +752,12 @@ void
  glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
                                 st_dst_reg dst, st_src_reg src0)
  {
-   st_src_reg tmp = get_temp(glsl_type::float_type);
+   int op = TGSI_OPCODE_ARL;
  
-   if (src0.type == GLSL_TYPE_INT)
-      emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
-   else if (src0.type == GLSL_TYPE_UINT)
-      emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
-   else
-      tmp = src0;
-   
-   emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
+   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
+      op = TGSI_OPCODE_UARL;
+
+   emit(NULL, op, dst, src0);
  }
  
  /**
@@ -864,7 +890,7 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
     }
  }
  
-struct st_src_reg
+st_src_reg
  glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
  {
     st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
@@ -876,7 +902,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
     return src;
  }
  
-struct st_src_reg
+st_src_reg
  glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
  {
     st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
@@ -890,7 +916,7 @@ glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
     return src;
  }
  
-struct st_src_reg
+st_src_reg
  glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
  {
     if (native_integers)
@@ -991,29 +1017,6 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
  
        fp->OriginUpperLeft = ir->origin_upper_left;
        fp->PixelCenterInteger = ir->pixel_center_integer;
-
-   } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
-      struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
-      switch (ir->depth_layout) {
-      case ir_depth_layout_none:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
-         break;
-      case ir_depth_layout_any:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
-         break;
-      case ir_depth_layout_greater:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
-         break;
-      case ir_depth_layout_less:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
-         break;
-      case ir_depth_layout_unchanged:
-         fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
-         break;
-      default:
-         assert(0);
-         break;
-      }
     }
  
     if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
@@ -1033,7 +1036,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
           }
        }
  
-      struct variable_storage *storage;
+      variable_storage *storage;
        st_dst_reg dst;
        if (i == ir->num_state_slots) {
           /* We'll set the index later. */
@@ -1184,7 +1187,7 @@ glsl_to_tgsi_visitor::visit(ir_function *ir)
     }
  }
  
-GLboolean
+bool
  glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
  {
     int nonmul_operand = 1 - mul_operand;
@@ -1210,7 +1213,47 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
     return true;
  }
  
-GLboolean
+/**
+ * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   st_src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
+bool
  glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
  {
     /* Saturates were only introduced to vertex programs in
@@ -1291,6 +1334,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        if (try_emit_mad(ir, 0))
           return;
     }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+        return;
+      if (try_emit_mad_for_and_not(ir, 0))
+        return;
+   }
+
     if (try_emit_sat(ir))
        return;
  
@@ -1337,7 +1390,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
     switch (ir->operation) {
     case ir_unop_logic_not:
        if (result_dst.type != GLSL_TYPE_FLOAT)
-         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
        else {
           /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
            * older GPUs implement SEQ using multiple instructions (i915 uses two
@@ -1437,10 +1490,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
        break;
     case ir_binop_greater:
-      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
        break;
     case ir_binop_lequal:
-      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
        break;
     case ir_binop_gequal:
        emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
@@ -1458,15 +1511,45 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
           st_src_reg temp = get_temp(native_integers ?
                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                 glsl_type::vec4_type);
-         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
-         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
           
-         /* After the dot-product, the value will be an integer on the
-          * range [0,4].  Zero becomes 1.0, and positive values become zero.
-          */
-         emit_dp(ir, result_dst, temp, temp, vector_elements);
-         
-         if (result_dst.type == GLSL_TYPE_FLOAT) {
+         if (native_integers) {
+            st_dst_reg temp_dst = st_dst_reg(temp);
+            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
+            
+            emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
+            
+            /* Emit 1-3 AND operations to combine the SEQ results. */
+            switch (ir->operands[0]->type->vector_elements) {
+            case 2:
+               break;
+            case 3:
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_YYYY;
+               temp2.swizzle = SWIZZLE_ZZZZ;
+               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+               break;
+            case 4:
+               temp_dst.writemask = WRITEMASK_X;
+               temp1.swizzle = SWIZZLE_XXXX;
+               temp2.swizzle = SWIZZLE_YYYY;
+               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_ZZZZ;
+               temp2.swizzle = SWIZZLE_WWWW;
+               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+            }
+            
+            temp1.swizzle = SWIZZLE_XXXX;
+            temp2.swizzle = SWIZZLE_YYYY;
+            emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
+         } else {
+            emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+            
+            /* After the dot-product, the value will be an integer on the
+             * range [0,4].  Zero becomes 1.0, and positive values become zero.
+             */
+            emit_dp(ir, result_dst, temp, temp, vector_elements);
+
              /* Negating the result of the dot-product gives values on the range
               * [-4, 0].  Zero becomes 1.0, and negative values become zero.
               * This is achieved using SGE.
@@ -1474,11 +1557,6 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
              st_src_reg sge_src = result_src;
              sge_src.negate = ~sge_src.negate;
              emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
-         } else {
-            /* The TGSI negate flag doesn't work for integers, so use SEQ 0
-             * instead.
-             */
-            emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0));
           }
        } else {
           emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
@@ -1491,30 +1569,56 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
           st_src_reg temp = get_temp(native_integers ?
                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                 glsl_type::vec4_type);
-         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
           emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
  
-         /* After the dot-product, the value will be an integer on the
-          * range [0,4].  Zero stays zero, and positive values become 1.0.
-          */
-         glsl_to_tgsi_instruction *const dp =
-               emit_dp(ir, result_dst, temp, temp, vector_elements);
-         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
-             result_dst.type == GLSL_TYPE_FLOAT) {
-            /* The clamping to [0,1] can be done for free in the fragment
-             * shader with a saturate.
-             */
-            dp->saturate = true;
-         } else if (result_dst.type == GLSL_TYPE_FLOAT) {
-            /* Negating the result of the dot-product gives values on the range
-             * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
-             * achieved using SLT.
-             */
-            st_src_reg slt_src = result_src;
-            slt_src.negate = ~slt_src.negate;
-            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         if (native_integers) {
+            st_dst_reg temp_dst = st_dst_reg(temp);
+            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
+            
+            /* Emit 1-3 OR operations to combine the SNE results. */
+            switch (ir->operands[0]->type->vector_elements) {
+            case 2:
+               break;
+            case 3:
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_YYYY;
+               temp2.swizzle = SWIZZLE_ZZZZ;
+               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+               break;
+            case 4:
+               temp_dst.writemask = WRITEMASK_X;
+               temp1.swizzle = SWIZZLE_XXXX;
+               temp2.swizzle = SWIZZLE_YYYY;
+               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_ZZZZ;
+               temp2.swizzle = SWIZZLE_WWWW;
+               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+            }
+            
+            temp1.swizzle = SWIZZLE_XXXX;
+            temp2.swizzle = SWIZZLE_YYYY;
+            emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
           } else {
-            emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+            /* After the dot-product, the value will be an integer on the
+             * range [0,4].  Zero stays zero, and positive values become 1.0.
+             */
+            glsl_to_tgsi_instruction *const dp =
+                  emit_dp(ir, result_dst, temp, temp, vector_elements);
+            if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+               /* The clamping to [0,1] can be done for free in the fragment
+                * shader with a saturate.
+                */
+               dp->saturate = true;
+            } else {
+               /* Negating the result of the dot-product gives values on the range
+                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+                * achieved using SLT.
+                */
+               st_src_reg slt_src = result_src;
+               slt_src.negate = ~slt_src.negate;
+               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+            }
           }
        } else {
           emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
@@ -1553,41 +1657,52 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
     }
  
     case ir_binop_logic_xor:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
+      else
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
        break;
  
     case ir_binop_logic_or: {
-      /* After the addition, the value will be an integer on the
-       * range [0,2].  Zero stays zero, and positive values become 1.0.
-       */
-      glsl_to_tgsi_instruction *add =
-         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
-      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
-          result_dst.type == GLSL_TYPE_FLOAT) {
-         /* The clamping to [0,1] can be done for free in the fragment
-          * shader with a saturate if floats are being used as boolean values.
-          */
-         add->saturate = true;
-      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
-         /* Negating the result of the addition gives values on the range
-          * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
-          * is achieved using SLT.
+      if (native_integers) {
+         /* If integers are used as booleans, we can use an actual "or" 
+          * instruction.
            */
-         st_src_reg slt_src = result_src;
-         slt_src.negate = ~slt_src.negate;
-         emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         assert(native_integers);
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
        } else {
-         /* Use an SNE on the result of the addition.  Zero stays zero,
-          * 1 stays 1, and 2 becomes 1.
+         /* After the addition, the value will be an integer on the
+          * range [0,2].  Zero stays zero, and positive values become 1.0.
            */
-         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+         glsl_to_tgsi_instruction *add =
+            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+            /* The clamping to [0,1] can be done for free in the fragment
+             * shader with a saturate if floats are being used as boolean values.
+             */
+            add->saturate = true;
+         } else {
+            /* Negating the result of the addition gives values on the range
+             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+             * is achieved using SLT.
+             */
+            st_src_reg slt_src = result_src;
+            slt_src.negate = ~slt_src.negate;
+            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         }
        }
        break;
     }
  
     case ir_binop_logic_and:
-      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
-      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      /* If native integers are disabled, the bool args are stored as float 0.0
+       * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
+       * actual AND opcode.
+       */
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
+      else
+         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
        break;
  
     case ir_binop_dot:
@@ -1610,18 +1725,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
        break;
     case ir_unop_i2f:
-   case ir_unop_b2f:
        if (native_integers) {
           emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
           break;
        }
+      /* fallthrough to next case otherwise */
+   case ir_unop_b2f:
+      if (native_integers) {
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
+         break;
+      }
+      /* fallthrough to next case otherwise */
     case ir_unop_i2u:
     case ir_unop_u2i:
        /* Converting between signed and unsigned integers is a no-op. */
-   case ir_unop_b2i:
-      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
        result_src = op[0];
        break;
+   case ir_unop_b2i:
+      if (native_integers) {
+         /* Booleans are stored as integers using ~0 for true and 0 for false.
+          * GLSL requires that int(bool) return 1 for true and 0 for false.
+          * This conversion is done with AND, but it could be done with NEG.
+          */
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
+      } else {
+         /* Booleans and integers are both stored as floats when native 
+          * integers are disabled.
+          */
+         result_src = op[0];
+      }
+      break;
     case ir_unop_f2i:
        if (native_integers)
           emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
@@ -1629,9 +1762,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
           emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
        break;
     case ir_unop_f2b:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+      break;
     case ir_unop_i2b:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
-            st_src_reg_for_type(result_dst.type, 0));
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
        break;
     case ir_unop_trunc:
        emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1659,7 +1796,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        break;
  
     case ir_unop_bit_not:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
           break;
        }
@@ -1669,27 +1806,27 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
           break;
        }
     case ir_binop_lshift:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
           break;
        }
     case ir_binop_rshift:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
           break;
        }
     case ir_binop_bit_and:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
           break;
        }
     case ir_binop_bit_xor:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
           break;
        }
     case ir_binop_bit_or:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
           break;
        }
@@ -1779,14 +1916,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
           entry = new(mem_ctx) variable_storage(var,
                                                 PROGRAM_INPUT,
                                                 var->location);
-         if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
-             var->location >= VERT_ATTRIB_GENERIC0) {
-            _mesa_add_attribute(this->prog->Attributes,
-                                var->name,
-                                _mesa_sizeof_glsl_type(var->type->gl_type),
-                                var->type->gl_type,
-                                var->location - VERT_ATTRIB_GENERIC0);
-         }
           break;
        case ir_var_out:
           assert(var->location != -1);
@@ -1846,17 +1975,19 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
        if (element_size == 1) {
           index_reg = this->result;
        } else {
-         index_reg = get_temp(glsl_type::float_type);
+         index_reg = get_temp(native_integers ?
+                              glsl_type::int_type : glsl_type::float_type);
  
           emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
-              this->result, st_src_reg_for_float(element_size));
+              this->result, st_src_reg_for_type(index_reg.type, element_size));
        }
  
        /* If there was already a relative address register involved, add the
         * new and the old together to get the new offset.
         */
        if (src.reladdr != NULL) {
-         st_src_reg accum_reg = get_temp(glsl_type::float_type);
+         st_src_reg accum_reg = get_temp(native_integers ?
+                                glsl_type::int_type : glsl_type::float_type);
  
           emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
                index_reg, *src.reladdr);
@@ -2077,12 +2208,25 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
  
        for (i = 0; i < type_size(ir->lhs->type); i++) {
           st_src_reg l_src = st_src_reg(l);
+         st_src_reg condition_temp = condition;
           l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
           
+         if (native_integers) {
+            /* This is necessary because TGSI's CMP instruction expects the
+             * condition to be a float, and we store booleans as integers.
+             * If TGSI had a UCMP instruction or similar, this extra
+             * instruction would not be necessary.
+             */
+            condition_temp = get_temp(glsl_type::vec4_type);
+            condition.negate = 0;
+            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
+            condition_temp.swizzle = condition.swizzle;
+         }
+         
           if (switch_order) {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
+            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
           } else {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
+            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
           }
  
           l.index++;
@@ -2102,6 +2246,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
        inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
        new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
        new_inst->saturate = inst->saturate;
+      inst->dead_mask = inst->dst.writemask;
     } else {
        for (i = 0; i < type_size(ir->lhs->type); i++) {
           emit(ir, TGSI_OPCODE_MOV, l, r);
@@ -2369,21 +2514,23 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
  void
  glsl_to_tgsi_visitor::visit(ir_texture *ir)
  {
-   st_src_reg result_src, coord, lod_info, projector, dx, dy;
+   st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
     st_dst_reg result_dst, coord_dst;
     glsl_to_tgsi_instruction *inst = NULL;
     unsigned opcode = TGSI_OPCODE_NOP;
  
-   ir->coordinate->accept(this);
+   if (ir->coordinate) {
+      ir->coordinate->accept(this);
  
-   /* Put our coords in a temp.  We'll need to modify them for shadow,
-    * projection, or LOD, so the only case we'd use it as is is if
-    * we're doing plain old texturing.  The optimization passes on
-    * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
-    */
-   coord = get_temp(glsl_type::vec4_type);
-   coord_dst = st_dst_reg(coord);
-   emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+      /* Put our coords in a temp.  We'll need to modify them for shadow,
+       * projection, or LOD, so the only case we'd use it as is is if
+       * we're doing plain old texturing.  The optimization passes on
+       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+       */
+      coord = get_temp(glsl_type::vec4_type);
+      coord_dst = st_dst_reg(coord);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+   }
  
     if (ir->projector) {
        ir->projector->accept(this);
@@ -2417,11 +2564,24 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
        ir->lod_info.grad.dPdy->accept(this);
        dy = this->result;
        break;
-   case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
-      assert(!"GLSL 1.30 features unsupported");
+   case ir_txs:
+      opcode = TGSI_OPCODE_TXQ;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txf:
+      opcode = TGSI_OPCODE_TXF;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      if (ir->offset) {
+        ir->offset->accept(this);
+        offset = this->result;
+      }
        break;
     }
  
+   const glsl_type *sampler_type = ir->sampler->type;
+
     if (ir->projector) {
        if (opcode == TGSI_OPCODE_TEX) {
           /* Slot the projector in as the last component of the coord. */
@@ -2453,6 +2613,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
              tmp_src = get_temp(glsl_type::vec4_type);
              st_dst_reg tmp_dst = st_dst_reg(tmp_src);
  
+           /* Projective division not allowed for array samplers. */
+           assert(!sampler_type->sampler_array);
+
              tmp_dst.writemask = WRITEMASK_Z;
              emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
  
@@ -2477,12 +2640,21 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
         * coord.
         */
        ir->shadow_comparitor->accept(this);
-      coord_dst.writemask = WRITEMASK_Z;
+
+      /* XXX This will need to be updated for cubemap array samplers. */
+      if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
+          sampler_type->sampler_array) {
+         coord_dst.writemask = WRITEMASK_W;
+      } else {
+         coord_dst.writemask = WRITEMASK_Z;
+      }
+
        emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
        coord_dst.writemask = WRITEMASK_XYZW;
     }
  
-   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+       opcode == TGSI_OPCODE_TXF) {
        /* TGSI stores LOD or LOD bias in the last channel of the coords. */
        coord_dst.writemask = WRITEMASK_W;
        emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
@@ -2491,7 +2663,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
  
     if (opcode == TGSI_OPCODE_TXD)
        inst = emit(ir, opcode, result_dst, coord, dx, dy);
-   else
+   else if (opcode == TGSI_OPCODE_TXQ)
+      inst = emit(ir, opcode, result_dst, lod_info);
+   else if (opcode == TGSI_OPCODE_TXF) {
+      inst = emit(ir, opcode, result_dst, coord);
+   } else
        inst = emit(ir, opcode, result_dst, coord);
  
     if (ir->shadow_comparitor)
@@ -2501,7 +2677,14 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
                                                    this->shader_program,
                                                    this->prog);
  
-   const glsl_type *sampler_type = ir->sampler->type;
+   if (ir->offset) {
+       inst->tex_offset_num_offset = 1;
+       inst->tex_offsets[0].Index = offset.index;
+       inst->tex_offsets[0].File = offset.file;
+       inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
+       inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
+       inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
+   }
  
     switch (sampler_type->sampler_dimensionality) {
     case GLSL_SAMPLER_DIM_1D:
@@ -2524,6 +2707,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
     case GLSL_SAMPLER_DIM_BUF:
        assert(!"FINISHME: Implement ARB_texture_buffer_object");
        break;
+   case GLSL_SAMPLER_DIM_EXTERNAL:
+      inst->tex_target = TEXTURE_EXTERNAL_INDEX;
+      break;
     default:
        assert(!"Should not get here.");
     }
@@ -2664,171 +2850,6 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
     _mesa_update_shader_textures_used(prog);
  }
  
-
-/**
- * Check if the given vertex/fragment/shader program is within the
- * resource limits of the context (number of texture units, etc).
- * If any of those checks fail, record a linker error.
- *
- * XXX more checks are needed...
- */
-static void
-check_resources(const struct gl_context *ctx,
-                struct gl_shader_program *shader_program,
-                glsl_to_tgsi_visitor *prog,
-                struct gl_program *proginfo)
-{
-   switch (proginfo->Target) {
-   case GL_VERTEX_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->samplers_used) >
-          ctx->Const.MaxVertexTextureImageUnits) {
-         fail_link(shader_program, "Too many vertex shader texture samplers");
-      }
-      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many vertex shader constants");
-      }
-      break;
-   case MESA_GEOMETRY_PROGRAM:
-      if (_mesa_bitcount(prog->samplers_used) >
-          ctx->Const.MaxGeometryTextureImageUnits) {
-         fail_link(shader_program, "Too many geometry shader texture samplers");
-      }
-      if (proginfo->Parameters->NumParameters >
-          MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
-         fail_link(shader_program, "Too many geometry shader constants");
-      }
-      break;
-   case GL_FRAGMENT_PROGRAM_ARB:
-      if (_mesa_bitcount(prog->samplers_used) >
-          ctx->Const.MaxTextureImageUnits) {
-         fail_link(shader_program, "Too many fragment shader texture samplers");
-      }
-      if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
-         fail_link(shader_program, "Too many fragment shader constants");
-      }
-      break;
-   default:
-      _mesa_problem(ctx, "unexpected program type in check_resources()");
-   }
-}
-
-
-
-struct uniform_sort {
-   struct gl_uniform *u;
-   int pos;
-};
-
-/* The shader_program->Uniforms list is almost sorted in increasing
- * uniform->{Frag,Vert}Pos locations, but not quite when there are
- * uniforms shared between targets.  We need to add parameters in
- * increasing order for the targets.
- */
-static int
-sort_uniforms(const void *a, const void *b)
-{
-   struct uniform_sort *u1 = (struct uniform_sort *)a;
-   struct uniform_sort *u2 = (struct uniform_sort *)b;
-
-   return u1->pos - u2->pos;
-}
-
-/* Add the uniforms to the parameters.  The linker chose locations
- * in our parameters lists (which weren't created yet), which the
- * uniforms code will use to poke values into our parameters list
- * when uniforms are updated.
- */
-static void
-add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
-                               struct gl_shader *shader,
-                               struct gl_program *prog)
-{
-   unsigned int i;
-   unsigned int next_sampler = 0, num_uniforms = 0;
-   struct uniform_sort *sorted_uniforms;
-
-   sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
-                                 shader_program->Uniforms->NumUniforms);
-
-   for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
-      struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
-      int parameter_index = -1;
-
-      switch (shader->Type) {
-      case GL_VERTEX_SHADER:
-         parameter_index = uniform->VertPos;
-         break;
-      case GL_FRAGMENT_SHADER:
-         parameter_index = uniform->FragPos;
-         break;
-      case GL_GEOMETRY_SHADER:
-         parameter_index = uniform->GeomPos;
-         break;
-      }
-
-      /* Only add uniforms used in our target. */
-      if (parameter_index != -1) {
-         sorted_uniforms[num_uniforms].pos = parameter_index;
-         sorted_uniforms[num_uniforms].u = uniform;
-         num_uniforms++;
-      }
-   }
-
-   qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
-         sort_uniforms);
-
-   for (i = 0; i < num_uniforms; i++) {
-      struct gl_uniform *uniform = sorted_uniforms[i].u;
-      int parameter_index = sorted_uniforms[i].pos;
-      const glsl_type *type = uniform->Type;
-      unsigned int size;
-
-      if (type->is_vector() ||
-          type->is_scalar()) {
-         size = type->vector_elements;
-      } else {
-         size = type_size(type) * 4;
-      }
-
-      gl_register_file file;
-      if (type->is_sampler() ||
-          (type->is_array() && type->fields.array->is_sampler())) {
-         file = PROGRAM_SAMPLER;
-      } else {
-         file = PROGRAM_UNIFORM;
-      }
-
-      GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
-                                                uniform->Name);
-
-      if (index < 0) {
-         index = _mesa_add_parameter(prog->Parameters, file,
-                                    uniform->Name, size, type->gl_type,
-                                    NULL, NULL, 0x0);
-
-         /* Sampler uniform values are stored in prog->SamplerUnits,
-          * and the entry in that array is selected by this index we
-          * store in ParameterValues[].
-          */
-         if (file == PROGRAM_SAMPLER) {
-            for (unsigned int j = 0; j < size / 4; j++)
-               prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
-         }
-
-         /* The location chosen in the Parameters list here (returned
-          * from _mesa_add_uniform) has to match what the linker chose.
-          */
-         if (index != parameter_index) {
-            fail_link(shader_program, "Allocation of uniform `%s' to target "
-                     "failed (%d vs %d)\n",
-                     uniform->Name, index, parameter_index);
-         }
-      }
-   }
-
-   ralloc_free(sorted_uniforms);
-}
-
  static void
  set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
                         struct gl_shader_program *shader_program,
@@ -2890,43 +2911,13 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
                               element_type->matrix_columns,
                               element_type->vector_elements,
                               loc, 1, GL_FALSE, (GLfloat *)values);
-         loc += element_type->matrix_columns;
        } else {
           _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
                        values, element_type->gl_type);
-         loc += type_size(element_type);
        }
-   }
-}
  
-static void
-set_uniform_initializers(struct gl_context *ctx,
-                        struct gl_shader_program *shader_program)
-{
-   void *mem_ctx = NULL;
-
-   for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
-      struct gl_shader *shader = shader_program->_LinkedShaders[i];
-
-      if (shader == NULL)
-         continue;
-
-      foreach_iter(exec_list_iterator, iter, *shader->ir) {
-         ir_instruction *ir = (ir_instruction *)iter.get();
-         ir_variable *var = ir->as_variable();
-
-         if (!var || var->mode != ir_var_uniform || !var->constant_value)
-            continue;
-
-         if (!mem_ctx)
-            mem_ctx = ralloc_context(NULL);
-
-         set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
-                                var->type, var->constant_value);
-      }
+      loc++;
     }
-
-   ralloc_free(mem_ctx);
  }
  
  /*
@@ -2946,9 +2937,13 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
     GLint outputMap[VERT_RESULT_MAX];
     GLint outputTypes[VERT_RESULT_MAX];
     GLuint numVaryingReads = 0;
-   GLboolean usedTemps[MAX_TEMPS];
+   GLboolean *usedTemps;
     GLuint firstTemp = 0;
  
+   usedTemps = new GLboolean[MAX_TEMPS];
+   if (!usedTemps) {
+      return;
+   }
     _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
                               usedTemps, MAX_TEMPS);
  
@@ -2981,6 +2976,8 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
        }
     }
  
+   delete [] usedTemps;
+
     if (numVaryingReads == 0)
        return; /* nothing to be done */
  
@@ -3052,9 +3049,13 @@ get_src_arg_mask(st_dst_reg dst, st_src_reg src)
  void
  glsl_to_tgsi_visitor::simplify_cmp(void)
  {
-   unsigned tempWrites[MAX_TEMPS];
+   unsigned *tempWrites;
     unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
  
+   tempWrites = new unsigned[MAX_TEMPS];
+   if (!tempWrites) {
+      return;
+   }
     memset(tempWrites, 0, sizeof(tempWrites));
     memset(outputWrites, 0, sizeof(outputWrites));
  
@@ -3070,7 +3071,7 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
            inst->op == TGSI_OPCODE_END ||
            inst->op == TGSI_OPCODE_ENDSUB ||
            inst->op == TGSI_OPCODE_RET) {
-         return;
+         break;
        }
  
        if (inst->dst.file == PROGRAM_OUTPUT) {
@@ -3095,6 +3096,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
           inst->src[0] = inst->src[1];
        }
     }
+
+   delete [] tempWrites;
  }
  
  /* Replaces all references to a temporary register index with another index. */
@@ -3511,25 +3514,23 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
           break;
  
        case TGSI_OPCODE_ENDIF:
-         --level;
-         break;
-
        case TGSI_OPCODE_ELSE:
-         /* Clear all channels written inside the preceding if block from the
-          * write array, but leave those that were not touched.
-          *
-          * FIXME: This destroys opportunities to remove dead code inside of
-          * IF blocks that are followed by an ELSE block.
+         /* Promote the recorded level all channels written inside the preceding
+          * if or else block to the level above the if/else block.
            */
           for (int r = 0; r < this->next_temp; r++) {
              for (int c = 0; c < 4; c++) {
                 if (!writes[4 * r + c])
                          continue;
  
-               if (write_level[4 * r + c] >= level)
-                        writes[4 * r + c] = NULL;
+               if (write_level[4 * r + c] == level)
+                        write_level[4 * r + c] = level-1;
              }
           }
+
+         if(inst->op == TGSI_OPCODE_ENDIF)
+            --level;
+         
           break;
  
        case TGSI_OPCODE_IF:
@@ -3602,7 +3603,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
        
        if (!inst->dead_mask || !inst->dst.writemask)
           continue;
-      else if (inst->dead_mask == inst->dst.writemask) {
+      else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
           iter.remove();
           delete inst;
           removed++;
@@ -3728,7 +3729,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
     inst->sampler = 0;
     inst->tex_target = TEXTURE_2D_INDEX;
  
-   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->InputsRead |= FRAG_BIT_TEX0;
     prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
     v->samplers_used |= (1 << 0);
  
@@ -3799,7 +3800,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
              src_regs[i].index = src0.index;
           }
           else if (src_regs[i].file == PROGRAM_INPUT)
-            prog->InputsRead |= (1 << src_regs[i].index);
+            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
        }
  
        v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
@@ -3808,8 +3809,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
     /* Make modifications to fragment program info. */
     prog->Parameters = _mesa_combine_parameter_lists(params,
                                                      original->prog->Parameters);
-   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
-   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
     _mesa_free_parameter_list(params);
     count_resources(v, prog);
     fp->glsl_to_tgsi = v;
@@ -3854,7 +3853,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
     inst->sampler = samplerIndex;
     inst->tex_target = TEXTURE_2D_INDEX;
  
-   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->InputsRead |= FRAG_BIT_TEX0;
     prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
     v->samplers_used |= (1 << samplerIndex);
  
@@ -3876,7 +3875,7 @@ get_bitmap_visitor(struct st_fragment_program *fp,
        for (int i=0; i<3; i++) {
           src_regs[i] = inst->src[i];
           if (src_regs[i].file == PROGRAM_INPUT)
-            prog->InputsRead |= (1 << src_regs[i].index);
+            prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
        }
  
        v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
@@ -3884,8 +3883,6 @@ get_bitmap_visitor(struct st_fragment_program *fp,
  
     /* Make modifications to fragment program info. */
     prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
-   prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
-   prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
     count_resources(v, prog);
     fp->glsl_to_tgsi = v;
  }
@@ -3944,6 +3941,7 @@ struct st_translate {
  /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
  static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
     TGSI_SEMANTIC_FACE,
+   TGSI_SEMANTIC_VERTEXID,
     TGSI_SEMANTIC_INSTANCEID
  };
  
@@ -4182,14 +4180,33 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
     return src;
  }
  
+static struct tgsi_texture_offset
+translate_tex_offset(struct st_translate *t,
+                     const struct tgsi_texture_offset *in_offset)
+{
+   struct tgsi_texture_offset offset;
+
+   assert(in_offset->File == PROGRAM_IMMEDIATE);
+
+   offset.File = TGSI_FILE_IMMEDIATE;
+   offset.Index = in_offset->Index;
+   offset.SwizzleX = in_offset->SwizzleX;
+   offset.SwizzleY = in_offset->SwizzleY;
+   offset.SwizzleZ = in_offset->SwizzleZ;
+
+   return offset;
+}
+
  static void
  compile_tgsi_instruction(struct st_translate *t,
-                         const struct glsl_to_tgsi_instruction *inst)
+                         const glsl_to_tgsi_instruction *inst)
  {
     struct ureg_program *ureg = t->ureg;
     GLuint i;
     struct ureg_dst dst[1];
     struct ureg_src src[4];
+   struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
+
     unsigned num_dst;
     unsigned num_src;
  
@@ -4223,11 +4240,17 @@ compile_tgsi_instruction(struct st_translate *t,
     case TGSI_OPCODE_TXD:
     case TGSI_OPCODE_TXL:
     case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXF:
        src[num_src++] = t->samplers[inst->sampler];
+      for (i = 0; i < inst->tex_offset_num_offset; i++) {
+         texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
+      }
        ureg_tex_insn(ureg,
                      inst->op,
                      dst, num_dst, 
                      translate_texture_target(inst->tex_target, inst->tex_shadow),
+                    texoffsets, inst->tex_offset_num_offset,
                      src, num_src);
        return;
  
@@ -4246,37 +4269,15 @@ compile_tgsi_instruction(struct st_translate *t,
  }
  
  /**
- * Emit the TGSI instructions to adjust the WPOS pixel center convention
- * Basically, add (adjX, adjY) to the fragment position.
- */
-static void
-emit_adjusted_wpos(struct st_translate *t,
-                   const struct gl_program *program,
-                   float adjX, float adjY)
-{
-   struct ureg_program *ureg = t->ureg;
-   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
-   struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
-
-   /* Note that we bias X and Y and pass Z and W through unchanged.
-    * The shader might also use gl_FragCoord.w and .z.
-    */
-   ureg_ADD(ureg, wpos_temp, wpos_input,
-            ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
-
-   t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
-}
-
-
-/**
- * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ * Emit the TGSI instructions for inverting and adjusting WPOS.
   * This code is unavoidable because it also depends on whether
   * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
   */
  static void
-emit_wpos_inversion(struct st_translate *t,
-                    const struct gl_program *program,
-                    bool invert)
+emit_wpos_adjustment( struct st_translate *t,
+                      const struct gl_program *program,
+                      boolean invert,
+                      GLfloat adjX, GLfloat adjY[2])
  {
     struct ureg_program *ureg = t->ureg;
  
@@ -4295,35 +4296,55 @@ emit_wpos_inversion(struct st_translate *t,
     unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
                                                         wposTransformState);
  
-   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
-   struct ureg_dst wpos_temp;
+   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+   struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
     struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
  
-   /* MOV wpos_temp, input[wpos]
-    */
-   if (wpos_input.File == TGSI_FILE_TEMPORARY)
-      wpos_temp = ureg_dst(wpos_input);
-   else {
-      wpos_temp = ureg_DECL_temporary(ureg);
-      ureg_MOV(ureg, wpos_temp, wpos_input);
+   /* First, apply the coordinate shift: */
+   if (adjX || adjY[0] || adjY[1]) {
+      if (adjY[0] != adjY[1]) {
+         /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
+          * depending on whether inversion is actually going to be applied
+          * or not, which is determined by testing against the inversion
+          * state variable used below, which will be either +1 or -1.
+          */
+         struct ureg_dst adj_temp = ureg_DECL_temporary(ureg);
+
+         ureg_CMP(ureg, adj_temp,
+                  ureg_scalar(wpostrans, invert ? 2 : 0),
+                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
+                  ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
+         ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
+      } else {
+         ureg_ADD(ureg, wpos_temp, wpos_input,
+                  ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
+      }
+      wpos_input = ureg_src(wpos_temp);
+   } else {
+      /* MOV wpos_temp, input[wpos]
+       */
+      ureg_MOV( ureg, wpos_temp, wpos_input );
     }
  
+   /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
+    * inversion/identity, or the other way around if we're drawing to an FBO.
+    */
     if (invert) {
        /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
         */
-      ureg_MAD(ureg,
-               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
-               wpos_input,
-               ureg_scalar(wpostrans, 0),
-               ureg_scalar(wpostrans, 1));
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 0),
+                ureg_scalar(wpostrans, 1));
     } else {
        /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
         */
-      ureg_MAD(ureg,
-               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
-               wpos_input,
-               ureg_scalar(wpostrans, 2),
-               ureg_scalar(wpostrans, 3));
+      ureg_MAD( ureg,
+                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
+                wpos_input,
+                ureg_scalar(wpostrans, 2),
+                ureg_scalar(wpostrans, 3));
     }
  
     /* Use wpos_temp as position input from here on:
@@ -4344,8 +4365,37 @@ emit_wpos(struct st_context *st,
     const struct gl_fragment_program *fp =
        (const struct gl_fragment_program *) program;
     struct pipe_screen *pscreen = st->pipe->screen;
+   GLfloat adjX = 0.0f;
+   GLfloat adjY[2] = { 0.0f, 0.0f };
     boolean invert = FALSE;
  
+   /* Query the pixel center conventions supported by the pipe driver and set
+    * adjX, adjY to help out if it cannot handle the requested one internally.
+    *
+    * The bias of the y-coordinate depends on whether y-inversion takes place
+    * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
+    * drawing to an FBO (causes additional inversion), and whether the the pipe
+    * driver origin and the requested origin differ (the latter condition is
+    * stored in the 'invert' variable).
+    *
+    * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
+    *
+    * center shift only:
+    * i -> h: +0.5
+    * h -> i: -0.5
+    *
+    * inversion only:
+    * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
+    * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
+    * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
+    * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
+    *
+    * inversion and center shift:
+    * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
+    * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
+    * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
+    * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
+    */
     if (fp->OriginUpperLeft) {
        /* Fragment shader wants origin in upper-left */
        if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
@@ -4373,12 +4423,17 @@ emit_wpos(struct st_context *st,
     
     if (fp->PixelCenterInteger) {
        /* Fragment shader wants pixel center integer */
-      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
+      if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
           /* the driver supports pixel center integer */
+         adjY[1] = 1.0f;
           ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
-      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
+      }
+      else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
           /* the driver supports pixel center half integer, need to bias X,Y */
-         emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
+         adjX = -0.5f;
+         adjY[0] = -0.5f;
+         adjY[1] = 0.5f;
+      }
        else
           assert(0);
     }
@@ -4389,8 +4444,8 @@ emit_wpos(struct st_context *st,
        }
        else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
           /* the driver supports pixel center integer, need to bias X,Y */
+         adjX = adjY[0] = adjY[1] = 0.5f;
           ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
-         emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
        }
        else
           assert(0);
@@ -4398,7 +4453,7 @@ emit_wpos(struct st_context *st,
  
     /* we invert after adjustment so that we avoid the MOV to temporary,
      * and reuse the adjustment ADD instead */
-   emit_wpos_inversion(t, program, invert);
+   emit_wpos_adjustment(t, program, invert, adjX, adjY);
  }
  
  /**
@@ -4469,14 +4524,19 @@ st_translate_program(
     const ubyte outputSemanticIndex[],
     boolean passthrough_edgeflags)
  {
-   struct st_translate translate, *t;
+   struct st_translate *t;
     unsigned i;
     enum pipe_error ret = PIPE_OK;
  
     assert(numInputs <= Elements(t->inputs));
     assert(numOutputs <= Elements(t->outputs));
  
-   t = &translate;
+   t = CALLOC_STRUCT(st_translate);
+   if (!t) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto out;
+   }
+
     memset(t, 0, sizeof *t);
  
     t->procType = procType;
@@ -4497,6 +4557,25 @@ st_translate_program(
                                             interpMode[i]);
        }
  
+      if (program->shader_program->FragDepthLayout != FRAG_DEPTH_LAYOUT_NONE) {
+         switch (program->shader_program->FragDepthLayout) {
+         case FRAG_DEPTH_LAYOUT_ANY:
+            ureg_property_fs_depth_layout(ureg, TGSI_FS_DEPTH_LAYOUT_ANY);
+            break;
+         case FRAG_DEPTH_LAYOUT_GREATER:
+            ureg_property_fs_depth_layout(ureg, TGSI_FS_DEPTH_LAYOUT_GREATER);
+            break;
+         case FRAG_DEPTH_LAYOUT_LESS:
+            ureg_property_fs_depth_layout(ureg, TGSI_FS_DEPTH_LAYOUT_LESS);
+            break;
+         case FRAG_DEPTH_LAYOUT_UNCHANGED:
+            ureg_property_fs_depth_layout(ureg, TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
+            break;
+         default:
+            assert(0);
+         }
+      }
+
        if (proginfo->InputsRead & FRAG_BIT_WPOS) {
           /* Must do this after setting up t->inputs, and before
            * emitting constant references, below:
@@ -4531,7 +4610,8 @@ st_translate_program(
              break;
           default:
              assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
-            return PIPE_ERROR_BAD_INPUT;
+            ret = PIPE_ERROR_BAD_INPUT;
+            goto out;
           }
        }
     }
@@ -4712,13 +4792,17 @@ st_translate_program(
     }
  
  out:
-   FREE(t->insn);
-   FREE(t->labels);
-   FREE(t->constants);
-   FREE(t->immediates);
+   if (t) {
+      FREE(t->insn);
+      FREE(t->labels);
+      FREE(t->constants);
+      FREE(t->immediates);
+
+      if (t->error) {
+         debug_printf("%s: translate error flag set\n", __FUNCTION__);
+      }
  
-   if (t->error) {
-      debug_printf("%s: translate error flag set\n", __FUNCTION__);
+      FREE(t);
     }
  
     return ret;
@@ -4736,6 +4820,8 @@ get_mesa_program(struct gl_context *ctx,
  {
     glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
     struct gl_program *prog;
+   struct pipe_screen * screen = st_context(ctx)->pipe->screen;
+   unsigned pipe_shader_type;
     GLenum target;
     const char *target_string;
     bool progress;
@@ -4746,14 +4832,17 @@ get_mesa_program(struct gl_context *ctx,
     case GL_VERTEX_SHADER:
        target = GL_VERTEX_PROGRAM_ARB;
        target_string = "vertex";
+      pipe_shader_type = PIPE_SHADER_VERTEX;
        break;
     case GL_FRAGMENT_SHADER:
        target = GL_FRAGMENT_PROGRAM_ARB;
        target_string = "fragment";
+      pipe_shader_type = PIPE_SHADER_FRAGMENT;
        break;
     case GL_GEOMETRY_SHADER:
        target = GL_GEOMETRY_PROGRAM_NV;
        target_string = "geometry";
+      pipe_shader_type = PIPE_SHADER_GEOMETRY;
        break;
     default:
        assert(!"should not be reached");
@@ -4766,8 +4855,6 @@ get_mesa_program(struct gl_context *ctx,
     if (!prog)
        return NULL;
     prog->Parameters = _mesa_new_parameter_list();
-   prog->Varying = _mesa_new_parameter_list();
-   prog->Attributes = _mesa_new_parameter_list();
     v->ctx = ctx;
     v->prog = prog;
     v->shader_program = shader_program;
@@ -4775,7 +4862,8 @@ get_mesa_program(struct gl_context *ctx,
     v->glsl_version = ctx->Const.GLSLVersion;
     v->native_integers = ctx->Const.NativeIntegers;
  
-   add_uniforms_to_parameters_list(shader_program, shader, prog);
+   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
+                                              prog->Parameters);
  
     /* Emit intermediate IR for main(). */
     visit_exec_list(shader->ir, v);
@@ -4823,10 +4911,13 @@ get_mesa_program(struct gl_context *ctx,
     }
  #endif
  
-   /* Remove reads to output registers, and to varyings in vertex shaders. */
-   v->remove_output_reads(PROGRAM_OUTPUT);
-   if (target == GL_VERTEX_PROGRAM_ARB)
-      v->remove_output_reads(PROGRAM_VARYING);
+   if (!screen->get_shader_param(screen, pipe_shader_type,
+                                 PIPE_SHADER_CAP_OUTPUT_READ)) {
+      /* Remove reads to output registers, and to varyings in vertex shaders. */
+      v->remove_output_reads(PROGRAM_OUTPUT);
+      if (target == GL_VERTEX_PROGRAM_ARB)
+         v->remove_output_reads(PROGRAM_VARYING);
+   }
     
     /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
     v->simplify_cmp();
@@ -4854,18 +4945,26 @@ get_mesa_program(struct gl_context *ctx,
        _mesa_print_ir(shader->ir, NULL);
        printf("\n");
        printf("\n");
+      fflush(stdout);
     }
  
     prog->Instructions = NULL;
     prog->NumInstructions = 0;
  
-   do_set_program_inouts(shader->ir, prog);
+   do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
     count_resources(v, prog);
  
-   check_resources(ctx, shader_program, v, prog);
-
     _mesa_reference_program(ctx, &shader->Program, prog);
     
+   /* This has to be done last.  Any operation the can cause
+    * prog->ParameterValues to get reallocated (e.g., anything that adds a
+    * program constant) has to happen before creating this linkage.
+    */
+   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
+   if (!shader_program->LinkStatus) {
+      return NULL;
+   }
+
     struct st_vertex_program *stvp;
     struct st_fragment_program *stfp;
     struct st_geometry_program *stgp;
@@ -4946,19 +5045,21 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
           /* Lowering */
           do_mat_op_to_vec(ir);
           lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
-                                | LOG_TO_LOG2
+                                | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
  
           progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
  
-         progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
+         progress = do_common_optimization(ir, true, true,
+                                          options->MaxUnrollIterations)
+          || progress;
  
           progress = lower_quadop_vector(ir, false) || progress;
  
-         if (options->EmitNoIfs) {
+         if (options->MaxIfDepth == 0)
              progress = lower_discard(ir) || progress;
-            progress = lower_if_to_cond_assign(ir) || progress;
-         }
+
+         progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
  
           if (options->EmitNoNoise)
              progress = lower_noise(ir) || progress;
@@ -4991,29 +5092,18 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
        linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
  
        if (linked_prog) {
-         bool ok = true;
-
-         switch (prog->_LinkedShaders[i]->Type) {
-         case GL_VERTEX_SHADER:
-            _mesa_reference_vertprog(ctx, &prog->VertexProgram,
-                                     (struct gl_vertex_program *)linked_prog);
-            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
-                                                 linked_prog);
-            break;
-         case GL_FRAGMENT_SHADER:
-            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
-                                     (struct gl_fragment_program *)linked_prog);
-            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
-                                                 linked_prog);
-            break;
-         case GL_GEOMETRY_SHADER:
-            _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
-                                     (struct gl_geometry_program *)linked_prog);
-            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
-                                                 linked_prog);
-            break;
-         }
-         if (!ok) {
+        static const GLenum targets[] = {
+           GL_VERTEX_PROGRAM_ARB,
+           GL_FRAGMENT_PROGRAM_ARB,
+           GL_GEOMETRY_PROGRAM_NV
+        };
+
+        _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+                                linked_prog);
+         if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
+           _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+                                   NULL);
+            _mesa_reference_program(ctx, &linked_prog, NULL);
              return GL_FALSE;
           }
        }
@@ -5024,53 +5114,4 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
     return GL_TRUE;
  }
  
-
-/**
- * Link a GLSL shader program.  Called via glLinkProgram().
- */
-void
-st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
-{
-   unsigned int i;
-
-   _mesa_clear_shader_program_data(ctx, prog);
-
-   prog->LinkStatus = GL_TRUE;
-
-   for (i = 0; i < prog->NumShaders; i++) {
-      if (!prog->Shaders[i]->CompileStatus) {
-         fail_link(prog, "linking with uncompiled shader");
-         prog->LinkStatus = GL_FALSE;
-      }
-   }
-
-   prog->Varying = _mesa_new_parameter_list();
-   _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
-   _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
-   _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
-
-   if (prog->LinkStatus) {
-      link_shaders(ctx, prog);
-   }
-
-   if (prog->LinkStatus) {
-      if (!ctx->Driver.LinkShader(ctx, prog)) {
-         prog->LinkStatus = GL_FALSE;
-      }
-   }
-
-   set_uniform_initializers(ctx, prog);
-
-   if (ctx->Shader.Flags & GLSL_DUMP) {
-      if (!prog->LinkStatus) {
-         printf("GLSL shader program %d failed to link\n", prog->Name);
-      }
-
-      if (prog->InfoLog && prog->InfoLog[0] != 0) {
-         printf("GLSL shader program %d info log:\n", prog->Name);
-         printf("%s\n", prog->InfoLog);
-      }
-   }
-}
-
  } /* extern "C" */