mesa: Simplify calling Driver.ProgramStringNotify after previous refactors

[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 5fedf263090f2c28e4e68348c92cbbb9ac63880b..fe65ae53941cfc099e705208e436f06f2d07ace2 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -70,6 +70,7 @@ extern "C" {
  #include "st_mesa_to_tgsi.h"
  }
  
+#define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
  #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
                             (1 << PROGRAM_ENV_PARAM) |    \
                             (1 << PROGRAM_STATE_VAR) |    \
@@ -77,6 +78,11 @@ extern "C" {
                             (1 << PROGRAM_CONSTANT) |     \
                             (1 << PROGRAM_UNIFORM))
  
+#define MAX_TEMPS         4096
+
+/* will be 4 for GLSL 4.00 */
+#define MAX_GLSL_TEXTURE_OFFSET 1
+
  class st_src_reg;
  class st_dst_reg;
  
@@ -171,7 +177,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
     this->index = reg.index;
     this->swizzle = SWIZZLE_XYZW;
     this->negate = 0;
-   this->reladdr = NULL;
+   this->reladdr = reg.reladdr;
  }
  
  st_dst_reg::st_dst_reg(st_src_reg reg)
@@ -208,6 +214,9 @@ public:
     int sampler; /**< sampler index */
     int tex_target; /**< One of TEXTURE_*_INDEX */
     GLboolean tex_shadow;
+   struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
+   unsigned tex_offset_num_offset;
+   int dead_mask; /**< Used in dead code elimination */
  
     class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
  };
@@ -225,6 +234,20 @@ public:
     ir_variable *var; /* variable that maps to this, if any */
  };
  
+class immediate_storage : public exec_node {
+public:
+   immediate_storage(gl_constant_value *values, int size, int type)
+   {
+      memcpy(this->values, values, size * sizeof(gl_constant_value));
+      this->size = size;
+      this->type = type;
+   }
+   
+   gl_constant_value values[4];
+   int size; /**< Number of components (1-4) */
+   int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+};
+
  class function_entry : public exec_node {
  public:
     ir_function_signature *sig;
@@ -232,7 +255,7 @@ public:
     /**
      * identifier of this function signature used by the program.
      *
-    * At the point that Mesa instructions for function calls are
+    * At the point that TGSI instructions for function calls are
      * generated, we don't know the address of the first instruction of
      * the function body.  So we make the BranchTarget that is called a
      * small integer and rewrite them during set_branchtargets().
@@ -247,10 +270,9 @@ public:
     glsl_to_tgsi_instruction *bgn_inst;
  
     /**
-    * Index of the first instruction of the function body in actual
-    * Mesa IR.
+    * Index of the first instruction of the function body in actual TGSI.
      *
-    * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
+    * Set after conversion from glsl_to_tgsi_instruction to TGSI.
      */
     int inst;
  
@@ -278,9 +300,13 @@ public:
     bool indirect_addr_consts;
     
     int glsl_version;
+   bool native_integers;
  
     variable_storage *find_variable_storage(ir_variable *var);
  
+   int add_constant(gl_register_file file, gl_constant_value values[4],
+                    int size, int datatype, GLuint *swizzle_out);
+
     function_entry *get_function_signature(ir_function_signature *sig);
  
     st_src_reg get_temp(const glsl_type *type);
@@ -322,6 +348,10 @@ public:
     /** List of variable_storage */
     exec_list variables;
  
+   /** List of immediate_storage */
+   exec_list immediates;
+   int num_immediates;
+
     /** List of function_entry */
     exec_list function_signatures;
     int next_signature_id;
@@ -348,11 +378,11 @@ public:
     /**
      * Emit the correct dot-product instruction for the type of arguments
      */
-   void emit_dp(ir_instruction *ir,
-                st_dst_reg dst,
-                st_src_reg src0,
-                st_src_reg src1,
-                unsigned elements);
+   glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
+                                     st_dst_reg dst,
+                                     st_src_reg src0,
+                                     st_src_reg src1,
+                                     unsigned elements);
  
     void emit_scalar(ir_instruction *ir, unsigned op,
                     st_dst_reg dst, st_src_reg src0);
@@ -360,20 +390,25 @@ public:
     void emit_scalar(ir_instruction *ir, unsigned op,
                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
  
+   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
+
     void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
  
     void emit_scs(ir_instruction *ir, unsigned op,
                  st_dst_reg dst, const st_src_reg &src);
  
-   GLboolean try_emit_mad(ir_expression *ir,
-                         int mul_operand);
-   GLboolean try_emit_sat(ir_expression *ir);
+   bool try_emit_mad(ir_expression *ir,
+              int mul_operand);
+   bool try_emit_mad_for_and_not(ir_expression *ir,
+              int mul_operand);
+   bool try_emit_sat(ir_expression *ir);
  
     void emit_swz(ir_expression *ir);
  
     bool process_move_condition(ir_rvalue *ir);
  
     void remove_output_reads(gl_register_file type);
+   void simplify_cmp(void);
  
     void rename_temp_register(int index, int new_index);
     int get_first_temp_read(int index);
@@ -383,13 +418,14 @@ public:
  
     void copy_propagate(void);
     void eliminate_dead_code(void);
+   int eliminate_dead_code_advanced(void);
     void merge_registers(void);
     void renumber_registers(void);
  
     void *mem_ctx;
  };
  
-static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
+static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
  
  static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
  
@@ -479,10 +515,11 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
     inst->src[1] = src1;
     inst->src[2] = src2;
     inst->ir = ir;
+   inst->dead_mask = 0;
  
     inst->function = NULL;
     
-   if (op == TGSI_OPCODE_ARL)
+   if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
        this->num_address_regs = 1;
     
     /* Update indirect addressing status used by TGSI */
@@ -499,6 +536,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
        case PROGRAM_UNIFORM:
           this->indirect_addr_consts = true;
           break;
+      case PROGRAM_IMMEDIATE:
+         assert(!"immediates should not have indirect addressing");
+         break;
        default:
           break;
        }
@@ -518,6 +558,9 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
              case PROGRAM_UNIFORM:
                 this->indirect_addr_consts = true;
                 break;
+            case PROGRAM_IMMEDIATE:
+               assert(!"immediates should not have indirect addressing");
+               break;
              default:
                 break;
              }
@@ -526,7 +569,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
     }
  
     this->instructions.push_tail(inst);
-   
+
+   if (native_integers)
+      try_emit_float_set(ir, op, dst);
+
     return inst;
  }
  
@@ -552,11 +598,28 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
     return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
  }
  
+ /**
+ * Emits the code to convert the result of float SET instructions to integers.
+ */
+void
+glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
+                        st_dst_reg dst)
+{
+   if ((op == TGSI_OPCODE_SEQ ||
+        op == TGSI_OPCODE_SNE ||
+        op == TGSI_OPCODE_SGE ||
+        op == TGSI_OPCODE_SLT))
+   {
+      st_src_reg src = st_src_reg(dst);
+      src.negate = ~src.negate;
+      dst.type = GLSL_TYPE_FLOAT;
+      emit(ir, TGSI_OPCODE_F2I, dst, src);
+   }
+}
+
  /**
   * Determines whether to use an integer, unsigned integer, or float opcode 
   * based on the operands and input opcode, then emits the result.
- * 
- * TODO: type checking for remaining TGSI opcodes
   */
  unsigned
  glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
@@ -567,8 +630,8 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
     
     if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
        type = GLSL_TYPE_FLOAT;
-   else if (glsl_version >= 130)
-      type = src0.type;
+   else if (native_integers)
+      type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
  
  #define case4(c, f, i, u) \
     case TGSI_OPCODE_##c: \
@@ -594,12 +657,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
        case3(SGE, ISGE, USGE);
        case3(SLT, ISLT, USLT);
        
-      case2iu(SHL, SHL);
        case2iu(ISHR, USHR);
-      case2iu(NOT, NOT);
-      case2iu(AND, AND);
-      case2iu(OR, OR);
-      case2iu(XOR, XOR);
        
        default: break;
     }
@@ -608,7 +666,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
     return op;
  }
  
-void
+glsl_to_tgsi_instruction *
  glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
                             st_dst_reg dst, st_src_reg src0, st_src_reg src1,
                             unsigned elements)
@@ -617,7 +675,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
        TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
     };
  
-   emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
+   return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
  }
  
  /**
@@ -688,16 +746,12 @@ void
  glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
                                 st_dst_reg dst, st_src_reg src0)
  {
-   st_src_reg tmp = get_temp(glsl_type::float_type);
+   int op = TGSI_OPCODE_ARL;
  
-   if (src0.type == GLSL_TYPE_INT)
-      emit(ir, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
-   else if (src0.type == GLSL_TYPE_UINT)
-      emit(ir, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
-   else
-      tmp = src0;
-   
-   emit(ir, TGSI_OPCODE_ARL, dst, tmp);
+   if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
+      op = TGSI_OPCODE_UARL;
+
+   emit(NULL, op, dst, src0);
  }
  
  /**
@@ -795,38 +849,71 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
     }
  }
  
-struct st_src_reg
+int
+glsl_to_tgsi_visitor::add_constant(gl_register_file file,
+                            gl_constant_value values[4], int size, int datatype,
+                            GLuint *swizzle_out)
+{
+   if (file == PROGRAM_CONSTANT) {
+      return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
+                                              size, datatype, swizzle_out);
+   } else {
+      int index = 0;
+      immediate_storage *entry;
+      assert(file == PROGRAM_IMMEDIATE);
+
+      /* Search immediate storage to see if we already have an identical
+       * immediate that we can use instead of adding a duplicate entry.
+       */
+      foreach_iter(exec_list_iterator, iter, this->immediates) {
+         entry = (immediate_storage *)iter.get();
+         
+         if (entry->size == size &&
+             entry->type == datatype &&
+             !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
+             return index;
+         }
+         index++;
+      }
+      
+      /* Add this immediate to the list. */
+      entry = new(mem_ctx) immediate_storage(values, size, datatype);
+      this->immediates.push_tail(entry);
+      this->num_immediates++;
+      return index;
+   }
+}
+
+st_src_reg
  glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
  {
-   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_FLOAT);
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
     union gl_constant_value uval;
  
     uval.f = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-                                         &uval, 1, GL_FLOAT, &src.swizzle);
+   src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
  
     return src;
  }
  
-struct st_src_reg
+st_src_reg
  glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
  {
-   st_src_reg src(PROGRAM_CONSTANT, -1, GLSL_TYPE_INT);
+   st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
     union gl_constant_value uval;
     
-   assert(glsl_version >= 130);
+   assert(native_integers);
  
     uval.i = val;
-   src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-                                         &uval, 1, GL_INT, &src.swizzle);
+   src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
  
     return src;
  }
  
-struct st_src_reg
+st_src_reg
  glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
  {
-   if (glsl_version >= 130)
+   if (native_integers)
        return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 
                                         st_src_reg_for_int(val);
     else
@@ -883,10 +970,8 @@ st_src_reg
  glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
  {
     st_src_reg src;
-   int swizzle[4];
-   int i;
  
-   src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
+   src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
     src.file = PROGRAM_TEMPORARY;
     src.index = next_temp;
     src.reladdr = NULL;
@@ -895,12 +980,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
     if (type->is_array() || type->is_record()) {
        src.swizzle = SWIZZLE_NOOP;
     } else {
-      for (i = 0; i < type->vector_elements; i++)
-         swizzle[i] = i;
-      for (; i < 4; i++)
-         swizzle[i] = type->vector_elements - 1;
-      src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
-                                 swizzle[2], swizzle[3]);
+      src.swizzle = swizzle_for_size(type->vector_elements);
     }
     src.negate = 0;
  
@@ -973,7 +1053,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
           }
        }
  
-      struct variable_storage *storage;
+      variable_storage *storage;
        st_dst_reg dst;
        if (i == ir->num_state_slots) {
           /* We'll set the index later. */
@@ -994,7 +1074,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
           this->next_temp += type_size(ir->type);
  
           dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
-               glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
+               native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
        }
  
  
@@ -1010,7 +1090,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
              }
           } else {
              st_src_reg src(PROGRAM_STATE_VAR, index,
-                  glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
+                  native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
              src.swizzle = slots[i].swizzle;
              emit(ir, TGSI_OPCODE_MOV, dst, src);
              /* even a float takes up a whole vec4 reg in a struct/array. */
@@ -1124,11 +1204,12 @@ glsl_to_tgsi_visitor::visit(ir_function *ir)
     }
  }
  
-GLboolean
+bool
  glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
  {
     int nonmul_operand = 1 - mul_operand;
     st_src_reg a, b, c;
+   st_dst_reg result_dst;
  
     ir_expression *expr = ir->operands[mul_operand]->as_expression();
     if (!expr || expr->operation != ir_binop_mul)
@@ -1142,12 +1223,54 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
     c = this->result;
  
     this->result = get_temp(ir->type);
-   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, c);
+   result_dst = st_dst_reg(this->result);
+   result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+   emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
  
     return true;
  }
  
-GLboolean
+/**
+ * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
+ *
+ * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
+ * implemented using multiplication, and logical-or is implemented using
+ * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
+ * As result, the logical expression (a & !b) can be rewritten as:
+ *
+ *     - a * !b
+ *     - a * (1 - b)
+ *     - (a * 1) - (a * b)
+ *     - a + -(a * b)
+ *     - a + (a * -b)
+ *
+ * This final expression can be implemented as a single MAD(a, -b, a)
+ * instruction.
+ */
+bool
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+{
+   const int other_operand = 1 - try_operand;
+   st_src_reg a, b;
+
+   ir_expression *expr = ir->operands[try_operand]->as_expression();
+   if (!expr || expr->operation != ir_unop_logic_not)
+      return false;
+
+   ir->operands[other_operand]->accept(this);
+   a = this->result;
+   expr->operands[0]->accept(this);
+   b = this->result;
+
+   b.negate = ~b.negate;
+
+   this->result = get_temp(ir->type);
+   emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
+
+   return true;
+}
+
+bool
  glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
  {
     /* Saturates were only introduced to vertex programs in
@@ -1163,10 +1286,32 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
     sat_src->accept(this);
     st_src_reg src = this->result;
  
-   this->result = get_temp(ir->type);
-   glsl_to_tgsi_instruction *inst;
-   inst = emit(ir, TGSI_OPCODE_MOV, st_dst_reg(this->result), src);
-   inst->saturate = true;
+   /* If we generated an expression instruction into a temporary in
+    * processing the saturate's operand, apply the saturate to that
+    * instruction.  Otherwise, generate a MOV to do the saturate.
+    *
+    * Note that we have to be careful to only do this optimization if
+    * the instruction in question was what generated src->result.  For
+    * example, ir_dereference_array might generate a MUL instruction
+    * to create the reladdr, and return us a src reg using that
+    * reladdr.  That MUL result is not the value we're trying to
+    * saturate.
+    */
+   ir_expression *sat_src_expr = sat_src->as_expression();
+   if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
+                       sat_src_expr->operation == ir_binop_add ||
+                       sat_src_expr->operation == ir_binop_dot)) {
+      glsl_to_tgsi_instruction *new_inst;
+      new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      new_inst->saturate = true;
+   } else {
+      this->result = get_temp(ir->type);
+      st_dst_reg result_dst = st_dst_reg(this->result);
+      result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+      glsl_to_tgsi_instruction *inst;
+      inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
+      inst->saturate = true;
+   }
  
     return true;
  }
@@ -1206,6 +1351,16 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        if (try_emit_mad(ir, 0))
           return;
     }
+
+   /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
+    */
+   if (ir->operation == ir_binop_logic_and) {
+      if (try_emit_mad_for_and_not(ir, 1))
+        return;
+      if (try_emit_mad_for_and_not(ir, 0))
+        return;
+   }
+
     if (try_emit_sat(ir))
        return;
  
@@ -1251,7 +1406,17 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
  
     switch (ir->operation) {
     case ir_unop_logic_not:
-      emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
+      if (result_dst.type != GLSL_TYPE_FLOAT)
+         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
+      else {
+         /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
+          * older GPUs implement SEQ using multiple instructions (i915 uses two
+          * SGE instructions and a MUL instruction).  Since our logic values are
+          * 0.0 and 1.0, 1-x also implements !x.
+          */
+         op[0].negate = ~op[0].negate;
+         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+      }
        break;
     case ir_unop_neg:
        assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
@@ -1342,10 +1507,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
        break;
     case ir_binop_greater:
-      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
        break;
     case ir_binop_lequal:
-      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
+      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
        break;
     case ir_binop_gequal:
        emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
@@ -1360,13 +1525,56 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        /* "==" operator producing a scalar boolean. */
        if (ir->operands[0]->type->is_vector() ||
            ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                 glsl_type::vec4_type);
-         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
-         emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
-         emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
+         
+         if (native_integers) {
+            st_dst_reg temp_dst = st_dst_reg(temp);
+            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
+            
+            emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
+            
+            /* Emit 1-3 AND operations to combine the SEQ results. */
+            switch (ir->operands[0]->type->vector_elements) {
+            case 2:
+               break;
+            case 3:
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_YYYY;
+               temp2.swizzle = SWIZZLE_ZZZZ;
+               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+               break;
+            case 4:
+               temp_dst.writemask = WRITEMASK_X;
+               temp1.swizzle = SWIZZLE_XXXX;
+               temp2.swizzle = SWIZZLE_YYYY;
+               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_ZZZZ;
+               temp2.swizzle = SWIZZLE_WWWW;
+               emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
+            }
+            
+            temp1.swizzle = SWIZZLE_XXXX;
+            temp2.swizzle = SWIZZLE_YYYY;
+            emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
+         } else {
+            emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
+            
+            /* After the dot-product, the value will be an integer on the
+             * range [0,4].  Zero becomes 1.0, and positive values become zero.
+             */
+            emit_dp(ir, result_dst, temp, temp, vector_elements);
+
+            /* Negating the result of the dot-product gives values on the range
+             * [-4, 0].  Zero becomes 1.0, and negative values become zero.
+             * This is achieved using SGE.
+             */
+            st_src_reg sge_src = result_src;
+            sge_src.negate = ~sge_src.negate;
+            emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+         }
        } else {
           emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
        }
@@ -1375,38 +1583,143 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        /* "!=" operator producing a scalar boolean. */
        if (ir->operands[0]->type->is_vector() ||
            ir->operands[1]->type->is_vector()) {
-         st_src_reg temp = get_temp(glsl_version >= 130 ? 
+         st_src_reg temp = get_temp(native_integers ?
                 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
                 glsl_type::vec4_type);
-         assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
           emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
-         emit_dp(ir, result_dst, temp, temp, vector_elements);
-         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+         if (native_integers) {
+            st_dst_reg temp_dst = st_dst_reg(temp);
+            st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
+            
+            /* Emit 1-3 OR operations to combine the SNE results. */
+            switch (ir->operands[0]->type->vector_elements) {
+            case 2:
+               break;
+            case 3:
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_YYYY;
+               temp2.swizzle = SWIZZLE_ZZZZ;
+               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+               break;
+            case 4:
+               temp_dst.writemask = WRITEMASK_X;
+               temp1.swizzle = SWIZZLE_XXXX;
+               temp2.swizzle = SWIZZLE_YYYY;
+               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+               temp_dst.writemask = WRITEMASK_Y;
+               temp1.swizzle = SWIZZLE_ZZZZ;
+               temp2.swizzle = SWIZZLE_WWWW;
+               emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
+            }
+            
+            temp1.swizzle = SWIZZLE_XXXX;
+            temp2.swizzle = SWIZZLE_YYYY;
+            emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
+         } else {
+            /* After the dot-product, the value will be an integer on the
+             * range [0,4].  Zero stays zero, and positive values become 1.0.
+             */
+            glsl_to_tgsi_instruction *const dp =
+                  emit_dp(ir, result_dst, temp, temp, vector_elements);
+            if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+               /* The clamping to [0,1] can be done for free in the fragment
+                * shader with a saturate.
+                */
+               dp->saturate = true;
+            } else {
+               /* Negating the result of the dot-product gives values on the range
+                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+                * achieved using SLT.
+                */
+               st_src_reg slt_src = result_src;
+               slt_src.negate = ~slt_src.negate;
+               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+            }
+         }
        } else {
           emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
        }
        break;
  
-   case ir_unop_any:
+   case ir_unop_any: {
        assert(ir->operands[0]->type->is_vector());
-      emit_dp(ir, result_dst, op[0], op[0],
-              ir->operands[0]->type->vector_elements);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+
+      /* After the dot-product, the value will be an integer on the
+       * range [0,4].  Zero stays zero, and positive values become 1.0.
+       */
+      glsl_to_tgsi_instruction *const dp =
+         emit_dp(ir, result_dst, op[0], op[0],
+                 ir->operands[0]->type->vector_elements);
+      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
+          result_dst.type == GLSL_TYPE_FLOAT) {
+             /* The clamping to [0,1] can be done for free in the fragment
+              * shader with a saturate.
+              */
+             dp->saturate = true;
+      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
+             /* Negating the result of the dot-product gives values on the range
+              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
+              * is achieved using SLT.
+              */
+             st_src_reg slt_src = result_src;
+             slt_src.negate = ~slt_src.negate;
+             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+      }
+      else {
+         /* Use SNE 0 if integers are being used as boolean values. */
+         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
+      }
        break;
+   }
  
     case ir_binop_logic_xor:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
+      else
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
        break;
  
-   case ir_binop_logic_or:
-      /* This could be a saturated add and skip the SNE. */
-      emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
-      emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
+   case ir_binop_logic_or: {
+      if (native_integers) {
+         /* If integers are used as booleans, we can use an actual "or" 
+          * instruction.
+          */
+         assert(native_integers);
+         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
+      } else {
+         /* After the addition, the value will be an integer on the
+          * range [0,2].  Zero stays zero, and positive values become 1.0.
+          */
+         glsl_to_tgsi_instruction *add =
+            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
+         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+            /* The clamping to [0,1] can be done for free in the fragment
+             * shader with a saturate if floats are being used as boolean values.
+             */
+            add->saturate = true;
+         } else {
+            /* Negating the result of the addition gives values on the range
+             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
+             * is achieved using SLT.
+             */
+            st_src_reg slt_src = result_src;
+            slt_src.negate = ~slt_src.negate;
+            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+         }
+      }
        break;
+   }
  
     case ir_binop_logic_and:
-      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
-      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
+      /* If native integers are disabled, the bool args are stored as float 0.0
+       * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
+       * actual AND opcode.
+       */
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
+      else
+         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
        break;
  
     case ir_binop_dot:
@@ -1429,25 +1742,50 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
        break;
     case ir_unop_i2f:
-   case ir_unop_b2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
           break;
        }
-   case ir_unop_b2i:
-      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
+      /* fallthrough to next case otherwise */
+   case ir_unop_b2f:
+      if (native_integers) {
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
+         break;
+      }
+      /* fallthrough to next case otherwise */
+   case ir_unop_i2u:
+   case ir_unop_u2i:
+      /* Converting between signed and unsigned integers is a no-op. */
        result_src = op[0];
        break;
+   case ir_unop_b2i:
+      if (native_integers) {
+         /* Booleans are stored as integers using ~0 for true and 0 for false.
+          * GLSL requires that int(bool) return 1 for true and 0 for false.
+          * This conversion is done with AND, but it could be done with NEG.
+          */
+         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
+      } else {
+         /* Booleans and integers are both stored as floats when native 
+          * integers are disabled.
+          */
+         result_src = op[0];
+      }
+      break;
     case ir_unop_f2i:
-      if (glsl_version >= 130)
+      if (native_integers)
           emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
        else
           emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
        break;
     case ir_unop_f2b:
+      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+      break;
     case ir_unop_i2b:
-      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
-            st_src_reg_for_type(result_dst.type, 0));
+      if (native_integers)
+         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
+      else
+         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
        break;
     case ir_unop_trunc:
        emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
@@ -1475,37 +1813,37 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
        break;
  
     case ir_unop_bit_not:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
           break;
        }
     case ir_unop_u2f:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
           break;
        }
     case ir_binop_lshift:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
           break;
        }
     case ir_binop_rshift:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
           break;
        }
     case ir_binop_bit_and:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
           break;
        }
     case ir_binop_bit_xor:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
           break;
        }
     case ir_binop_bit_or:
-      if (glsl_version >= 130) {
+      if (native_integers) {
           emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
           break;
        }
@@ -1595,14 +1933,6 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
           entry = new(mem_ctx) variable_storage(var,
                                                 PROGRAM_INPUT,
                                                 var->location);
-         if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
-             var->location >= VERT_ATTRIB_GENERIC0) {
-            _mesa_add_attribute(this->prog->Attributes,
-                                var->name,
-                                _mesa_sizeof_glsl_type(var->type->gl_type),
-                                var->type->gl_type,
-                                var->location - VERT_ATTRIB_GENERIC0);
-         }
           break;
        case ir_var_out:
           assert(var->location != -1);
@@ -1632,7 +1962,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
     }
  
     this->result = st_src_reg(entry->file, entry->index, var->type);
-   if (glsl_version <= 120)
+   if (!native_integers)
        this->result.type = GLSL_TYPE_FLOAT;
  }
  
@@ -1651,9 +1981,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
     if (index) {
        src.index += index->value.i[0] * element_size;
     } else {
-      st_src_reg array_base = this->result;
        /* Variable index array dereference.  It eats the "vec4" of the
-       * base of the array and an index that offsets the Mesa register
+       * base of the array and an index that offsets the TGSI register
         * index.
         */
        ir->array_index->accept(this);
@@ -1663,10 +1992,24 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
        if (element_size == 1) {
           index_reg = this->result;
        } else {
-         index_reg = get_temp(glsl_type::float_type);
+         index_reg = get_temp(native_integers ?
+                              glsl_type::int_type : glsl_type::float_type);
  
           emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
-              this->result, st_src_reg_for_float(element_size));
+              this->result, st_src_reg_for_type(index_reg.type, element_size));
+      }
+
+      /* If there was already a relative address register involved, add the
+       * new and the old together to get the new offset.
+       */
+      if (src.reladdr != NULL) {
+         st_src_reg accum_reg = get_temp(native_integers ?
+                                glsl_type::int_type : glsl_type::float_type);
+
+         emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
+              index_reg, *src.reladdr);
+
+         index_reg = accum_reg;
        }
  
        src.reladdr = ralloc(mem_ctx, st_src_reg);
@@ -1837,7 +2180,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
     if (ir->write_mask == 0) {
        assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
        l.writemask = WRITEMASK_XYZW;
-   } else if (ir->lhs->type->is_scalar()) {
+   } else if (ir->lhs->type->is_scalar() &&
+              ir->lhs->variable_referenced()->mode == ir_var_out) {
        /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
         * FINISHME: W component of fragment shader output zero, work correctly.
         */
@@ -1847,7 +2191,6 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
        int first_enabled_chan = 0;
        int rhs_chan = 0;
  
-      assert(ir->lhs->type->is_vector());
        l.writemask = ir->write_mask;
  
        for (int i = 0; i < 4; i++) {
@@ -1860,7 +2203,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
        /* Swizzle a small RHS vector into the channels being written.
         *
         * glsl ir treats write_mask as dictating how many channels are
-       * present on the RHS while Mesa IR treats write_mask as just
+       * present on the RHS while TGSI treats write_mask as just
         * showing which channels of the vec4 RHS get written.
         */
        for (int i = 0; i < 4; i++) {
@@ -1881,15 +2224,46 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
        st_src_reg condition = this->result;
  
        for (i = 0; i < type_size(ir->lhs->type); i++) {
+         st_src_reg l_src = st_src_reg(l);
+         st_src_reg condition_temp = condition;
+         l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
+         
+         if (native_integers) {
+            /* This is necessary because TGSI's CMP instruction expects the
+             * condition to be a float, and we store booleans as integers.
+             * If TGSI had a UCMP instruction or similar, this extra
+             * instruction would not be necessary.
+             */
+            condition_temp = get_temp(glsl_type::vec4_type);
+            condition.negate = 0;
+            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
+            condition_temp.swizzle = condition.swizzle;
+         }
+         
           if (switch_order) {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, st_src_reg(l), r);
+            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
           } else {
-            emit(ir, TGSI_OPCODE_CMP, l, condition, r, st_src_reg(l));
+            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
           }
  
           l.index++;
           r.index++;
        }
+   } else if (ir->rhs->as_expression() &&
+              this->instructions.get_tail() &&
+              ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
+              type_size(ir->lhs->type) == 1 &&
+              l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
+      /* To avoid emitting an extra MOV when assigning an expression to a 
+       * variable, emit the last instruction of the expression again, but
+       * replace the destination register with the target of the assignment.
+       * Dead code elimination will remove the original instruction.
+       */
+      glsl_to_tgsi_instruction *inst, *new_inst;
+      inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
+      new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
+      new_inst->saturate = inst->saturate;
+      inst->dead_mask = inst->dst.writemask;
     } else {
        for (i = 0; i < type_size(ir->lhs->type); i++) {
           emit(ir, TGSI_OPCODE_MOV, l, r);
@@ -1908,9 +2282,11 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
     gl_constant_value *values = (gl_constant_value *) stack_vals;
     GLenum gl_type = GL_NONE;
     unsigned int i;
+   static int in_array = 0;
+   gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
  
     /* Unfortunately, 4 floats is all we can get into
-    * _mesa_add_unnamed_constant.  So, make a temp to store an
+    * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
      * aggregate constant and move each constant value into it.  If we
      * get lucky, copy propagation will eliminate the extra moves.
      */
@@ -1944,6 +2320,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
        int size = type_size(ir->type->fields.array);
  
        assert(size > 0);
+      in_array++;
  
        for (i = 0; i < ir->type->length; i++) {
           ir->array_elements[i]->accept(this);
@@ -1956,6 +2333,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
           }
        }
        this->result = temp_base;
+      in_array--;
        return;
     }
  
@@ -1967,12 +2345,12 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
           assert(ir->type->base_type == GLSL_TYPE_FLOAT);
           values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
  
-         src = st_src_reg(PROGRAM_CONSTANT, -1, ir->type->base_type);
-         src.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-                                                      values,
-                                                      ir->type->vector_elements,
-                                                      GL_FLOAT,
-                                                      &src.swizzle);
+         src = st_src_reg(file, -1, ir->type->base_type);
+         src.index = add_constant(file,
+                                  values,
+                                  ir->type->vector_elements,
+                                  GL_FLOAT,
+                                  &src.swizzle);
           emit(ir, TGSI_OPCODE_MOV, mat_column, src);
  
           mat_column.index++;
@@ -1982,7 +2360,6 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
        return;
     }
  
-   src.file = PROGRAM_CONSTANT;
     switch (ir->type->base_type) {
     case GLSL_TYPE_FLOAT:
        gl_type = GL_FLOAT;
@@ -1991,27 +2368,27 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
        }
        break;
     case GLSL_TYPE_UINT:
-      gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
              values[i].u = ir->value.u[i];
           else
              values[i].f = ir->value.u[i];
        }
        break;
     case GLSL_TYPE_INT:
-      gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
+      gl_type = native_integers ? GL_INT : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
              values[i].i = ir->value.i[i];
           else
              values[i].f = ir->value.i[i];
        }
        break;
     case GLSL_TYPE_BOOL:
-      gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
+      gl_type = native_integers ? GL_BOOL : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
-         if (glsl_version >= 130)
+         if (native_integers)
              values[i].b = ir->value.b[i];
           else
              values[i].f = ir->value.b[i];
@@ -2021,10 +2398,12 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
        assert(!"Non-float/uint/int/bool constant");
     }
  
-   this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
-   this->result.index = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
-                                                  values, ir->type->vector_elements, gl_type,
-                                                  &this->result.swizzle);
+   this->result = st_src_reg(file, -1, ir->type);
+   this->result.index = add_constant(file,
+                                     values,
+                                     ir->type->vector_elements,
+                                     gl_type,
+                                     &this->result.swizzle);
  }
  
  function_entry *
@@ -2152,21 +2531,23 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
  void
  glsl_to_tgsi_visitor::visit(ir_texture *ir)
  {
-   st_src_reg result_src, coord, lod_info, projector, dx, dy;
+   st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
     st_dst_reg result_dst, coord_dst;
     glsl_to_tgsi_instruction *inst = NULL;
     unsigned opcode = TGSI_OPCODE_NOP;
  
-   ir->coordinate->accept(this);
+   if (ir->coordinate) {
+      ir->coordinate->accept(this);
  
-   /* Put our coords in a temp.  We'll need to modify them for shadow,
-    * projection, or LOD, so the only case we'd use it as is is if
-    * we're doing plain old texturing.  Mesa IR optimization should
-    * handle cleaning up our mess in that case.
-    */
-   coord = get_temp(glsl_type::vec4_type);
-   coord_dst = st_dst_reg(coord);
-   emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+      /* Put our coords in a temp.  We'll need to modify them for shadow,
+       * projection, or LOD, so the only case we'd use it as is is if
+       * we're doing plain old texturing.  The optimization passes on
+       * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
+       */
+      coord = get_temp(glsl_type::vec4_type);
+      coord_dst = st_dst_reg(coord);
+      emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
+   }
  
     if (ir->projector) {
        ir->projector->accept(this);
@@ -2200,11 +2581,24 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
        ir->lod_info.grad.dPdy->accept(this);
        dy = this->result;
        break;
-   case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
-      assert(!"GLSL 1.30 features unsupported");
+   case ir_txs:
+      opcode = TGSI_OPCODE_TXQ;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      break;
+   case ir_txf:
+      opcode = TGSI_OPCODE_TXF;
+      ir->lod_info.lod->accept(this);
+      lod_info = this->result;
+      if (ir->offset) {
+        ir->offset->accept(this);
+        offset = this->result;
+      }
        break;
     }
  
+   const glsl_type *sampler_type = ir->sampler->type;
+
     if (ir->projector) {
        if (opcode == TGSI_OPCODE_TEX) {
           /* Slot the projector in as the last component of the coord. */
@@ -2236,6 +2630,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
              tmp_src = get_temp(glsl_type::vec4_type);
              st_dst_reg tmp_dst = st_dst_reg(tmp_src);
  
+           /* Projective division not allowed for array samplers. */
+           assert(!sampler_type->sampler_array);
+
              tmp_dst.writemask = WRITEMASK_Z;
              emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
  
@@ -2260,12 +2657,21 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
         * coord.
         */
        ir->shadow_comparitor->accept(this);
-      coord_dst.writemask = WRITEMASK_Z;
+
+      /* XXX This will need to be updated for cubemap array samplers. */
+      if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
+          sampler_type->sampler_array) {
+         coord_dst.writemask = WRITEMASK_W;
+      } else {
+         coord_dst.writemask = WRITEMASK_Z;
+      }
+
        emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
        coord_dst.writemask = WRITEMASK_XYZW;
     }
  
-   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
+   if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
+       opcode == TGSI_OPCODE_TXF) {
        /* TGSI stores LOD or LOD bias in the last channel of the coords. */
        coord_dst.writemask = WRITEMASK_W;
        emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
@@ -2274,7 +2680,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
  
     if (opcode == TGSI_OPCODE_TXD)
        inst = emit(ir, opcode, result_dst, coord, dx, dy);
-   else
+   else if (opcode == TGSI_OPCODE_TXQ)
+      inst = emit(ir, opcode, result_dst, lod_info);
+   else if (opcode == TGSI_OPCODE_TXF) {
+      inst = emit(ir, opcode, result_dst, coord);
+   } else
        inst = emit(ir, opcode, result_dst, coord);
  
     if (ir->shadow_comparitor)
@@ -2284,7 +2694,14 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
                                                    this->shader_program,
                                                    this->prog);
  
-   const glsl_type *sampler_type = ir->sampler->type;
+   if (ir->offset) {
+       inst->tex_offset_num_offset = 1;
+       inst->tex_offsets[0].Index = offset.index;
+       inst->tex_offsets[0].File = offset.file;
+       inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
+       inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
+       inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
+   }
  
     switch (sampler_type->sampler_dimensionality) {
     case GLSL_SAMPLER_DIM_1D:
@@ -2357,7 +2774,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir)
  void
  glsl_to_tgsi_visitor::visit(ir_if *ir)
  {
-   glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
+   glsl_to_tgsi_instruction *cond_inst, *if_inst;
     glsl_to_tgsi_instruction *prev_inst;
  
     prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
@@ -2389,7 +2806,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir)
     visit_exec_list(&ir->then_instructions, this);
  
     if (!ir->else_instructions.is_empty()) {
-      else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
+      emit(ir->condition, TGSI_OPCODE_ELSE);
        visit_exec_list(&ir->else_instructions, this);
     }
  
@@ -2401,6 +2818,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
     result.file = PROGRAM_UNDEFINED;
     next_temp = 1;
     next_signature_id = 1;
+   num_immediates = 0;
     current_function = NULL;
     num_address_regs = 0;
     indirect_addr_temps = false;
@@ -2681,36 +3099,6 @@ set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
     }
  }
  
-static void
-set_uniform_initializers(struct gl_context *ctx,
-                        struct gl_shader_program *shader_program)
-{
-   void *mem_ctx = NULL;
-
-   for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
-      struct gl_shader *shader = shader_program->_LinkedShaders[i];
-
-      if (shader == NULL)
-         continue;
-
-      foreach_iter(exec_list_iterator, iter, *shader->ir) {
-         ir_instruction *ir = (ir_instruction *)iter.get();
-         ir_variable *var = ir->as_variable();
-
-         if (!var || var->mode != ir_var_uniform || !var->constant_value)
-            continue;
-
-         if (!mem_ctx)
-            mem_ctx = ralloc_context(NULL);
-
-         set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
-                                var->type, var->constant_value);
-      }
-   }
-
-   ralloc_free(mem_ctx);
-}
-
  /*
   * Scan/rewrite program to remove reads of custom (output) registers.
   * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
@@ -2728,11 +3116,11 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
     GLint outputMap[VERT_RESULT_MAX];
     GLint outputTypes[VERT_RESULT_MAX];
     GLuint numVaryingReads = 0;
-   GLboolean usedTemps[MAX_PROGRAM_TEMPS];
+   GLboolean usedTemps[MAX_TEMPS];
     GLuint firstTemp = 0;
  
     _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
-                             usedTemps, MAX_PROGRAM_TEMPS);
+                             usedTemps, MAX_TEMPS);
  
     assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
     assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
@@ -2752,7 +3140,7 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
              if (outputMap[var] == -1) {
                 numVaryingReads++;
                 outputMap[var] = _mesa_find_free_register(usedTemps,
-                                                         MAX_PROGRAM_TEMPS,
+                                                         MAX_TEMPS,
                                                           firstTemp);
                 outputTypes[var] = inst->src[j].type;
                 firstTemp = outputMap[var] + 1;
@@ -2788,6 +3176,97 @@ glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
     }
  }
  
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction
+ */
+static int
+get_src_arg_mask(st_dst_reg dst, st_src_reg src)
+{
+   int read_mask = 0, comp;
+
+   /* Now, given the src swizzle and the written channels, find which
+    * components are actually read
+    */
+   for (comp = 0; comp < 4; ++comp) {
+      const unsigned coord = GET_SWZ(src.swizzle, comp);
+      ASSERT(coord < 4);
+      if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
+         read_mask |= 1 << coord;
+   }
+
+   return read_mask;
+}
+
+/**
+ * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
+ * instruction is the first instruction to write to register T0.  There are
+ * several lowering passes done in GLSL IR (e.g. branches and
+ * relative addressing) that create a large number of conditional assignments
+ * that ir_to_mesa converts to CMP instructions like the one mentioned above.
+ *
+ * Here is why this conversion is safe:
+ * CMP T0, T1 T2 T0 can be expanded to:
+ * if (T1 < 0.0)
+ *     MOV T0, T2;
+ * else
+ *     MOV T0, T0;
+ *
+ * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
+ * as the original program.  If (T1 < 0.0) evaluates to false, executing
+ * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
+ * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
+ * because any instruction that was going to read from T0 after this was going
+ * to read a garbage value anyway.
+ */
+void
+glsl_to_tgsi_visitor::simplify_cmp(void)
+{
+   unsigned tempWrites[MAX_TEMPS];
+   unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
+
+   memset(tempWrites, 0, sizeof(tempWrites));
+   memset(outputWrites, 0, sizeof(outputWrites));
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      unsigned prevWriteMask = 0;
+
+      /* Give up if we encounter relative addressing or flow control. */
+      if (inst->dst.reladdr ||
+          tgsi_get_opcode_info(inst->op)->is_branch ||
+          inst->op == TGSI_OPCODE_BGNSUB ||
+          inst->op == TGSI_OPCODE_CONT ||
+          inst->op == TGSI_OPCODE_END ||
+          inst->op == TGSI_OPCODE_ENDSUB ||
+          inst->op == TGSI_OPCODE_RET) {
+         return;
+      }
+
+      if (inst->dst.file == PROGRAM_OUTPUT) {
+         assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
+         prevWriteMask = outputWrites[inst->dst.index];
+         outputWrites[inst->dst.index] |= inst->dst.writemask;
+      } else if (inst->dst.file == PROGRAM_TEMPORARY) {
+         assert(inst->dst.index < MAX_TEMPS);
+         prevWriteMask = tempWrites[inst->dst.index];
+         tempWrites[inst->dst.index] |= inst->dst.writemask;
+      }
+
+      /* For a CMP to be considered a conditional write, the destination
+       * register and source register two must be the same. */
+      if (inst->op == TGSI_OPCODE_CMP
+          && !(inst->dst.writemask & prevWriteMask)
+          && inst->src[2].file == inst->dst.file
+          && inst->src[2].index == inst->dst.index
+          && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
+
+         inst->op = TGSI_OPCODE_MOV;
+         inst->src[0] = inst->src[1];
+      }
+   }
+}
+
  /* Replaces all references to a temporary register index with another index. */
  void
  glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
@@ -3162,6 +3641,151 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
     }
  }
  
+/*
+ * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
+ * code elimination.  This is less primitive than eliminate_dead_code(), as it
+ * is per-channel and can detect consecutive writes without a read between them
+ * as dead code.  However, there is some dead code that can be eliminated by 
+ * eliminate_dead_code() but not this function - for example, this function 
+ * cannot eliminate an instruction writing to a register that is never read and
+ * is the only instruction writing to that register.
+ *
+ * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
+ * will occur.
+ */
+int
+glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
+{
+   glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
+                                                     glsl_to_tgsi_instruction *,
+                                                     this->next_temp * 4);
+   int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
+   int level = 0;
+   int removed = 0;
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+
+      assert(inst->dst.file != PROGRAM_TEMPORARY
+             || inst->dst.index < this->next_temp);
+      
+      switch (inst->op) {
+      case TGSI_OPCODE_BGNLOOP:
+      case TGSI_OPCODE_ENDLOOP:
+         /* End of a basic block, clear the write array entirely.
+          * FIXME: This keeps us from killing dead code when the writes are
+          * on either side of a loop, even when the register isn't touched
+          * inside the loop.
+          */
+         memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+         break;
+
+      case TGSI_OPCODE_ENDIF:
+         --level;
+         break;
+
+      case TGSI_OPCODE_ELSE:
+         /* Clear all channels written inside the preceding if block from the
+          * write array, but leave those that were not touched.
+          *
+          * FIXME: This destroys opportunities to remove dead code inside of
+          * IF blocks that are followed by an ELSE block.
+          */
+         for (int r = 0; r < this->next_temp; r++) {
+            for (int c = 0; c < 4; c++) {
+               if (!writes[4 * r + c])
+                        continue;
+
+               if (write_level[4 * r + c] >= level)
+                        writes[4 * r + c] = NULL;
+            }
+         }
+         break;
+
+      case TGSI_OPCODE_IF:
+         ++level;
+         /* fallthrough to default case to mark the condition as read */
+      
+      default:
+         /* Continuing the block, clear any channels from the write array that
+          * are read by this instruction.
+          */
+         for (unsigned i = 0; i < Elements(inst->src); i++) {
+            if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
+               /* Any temporary might be read, so no dead code elimination 
+                * across this instruction.
+                */
+               memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
+            } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
+               /* Clear where it's used as src. */
+               int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
+               src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
+               
+               for (int c = 0; c < 4; c++) {
+                  if (src_chans & (1 << c)) {
+                     writes[4 * inst->src[i].index + c] = NULL;
+                  }
+               }
+            }
+         }
+         break;
+      }
+
+      /* If this instruction writes to a temporary, add it to the write array.
+       * If there is already an instruction in the write array for one or more
+       * of the channels, flag that channel write as dead.
+       */
+      if (inst->dst.file == PROGRAM_TEMPORARY &&
+          !inst->dst.reladdr &&
+          !inst->saturate) {
+         for (int c = 0; c < 4; c++) {
+            if (inst->dst.writemask & (1 << c)) {
+               if (writes[4 * inst->dst.index + c]) {
+                  if (write_level[4 * inst->dst.index + c] < level)
+                     continue;
+                  else
+                     writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
+               }
+               writes[4 * inst->dst.index + c] = inst;
+               write_level[4 * inst->dst.index + c] = level;
+            }
+         }
+      }
+   }
+
+   /* Anything still in the write array at this point is dead code. */
+   for (int r = 0; r < this->next_temp; r++) {
+      for (int c = 0; c < 4; c++) {
+         glsl_to_tgsi_instruction *inst = writes[4 * r + c];
+         if (inst)
+            inst->dead_mask |= (1 << c);
+      }
+   }
+
+   /* Now actually remove the instructions that are completely dead and update
+    * the writemask of other instructions with dead channels.
+    */
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      
+      if (!inst->dead_mask || !inst->dst.writemask)
+         continue;
+      else if (inst->dead_mask == inst->dst.writemask) {
+         iter.remove();
+         delete inst;
+         removed++;
+      } else
+         inst->dst.writemask &= ~(inst->dead_mask);
+   }
+
+   ralloc_free(write_level);
+   ralloc_free(writes);
+   
+   return removed;
+}
+
  /* Merges temporary registers together where possible to reduce the number of 
   * registers needed to run a program.
   * 
@@ -3233,6 +3857,205 @@ glsl_to_tgsi_visitor::renumber_registers(void)
     this->next_temp = new_index;
  }
  
+/**
+ * Returns a fragment program which implements the current pixel transfer ops.
+ * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
+ */
+extern "C" void
+get_pixel_transfer_visitor(struct st_fragment_program *fp,
+                           glsl_to_tgsi_visitor *original,
+                           int scale_and_bias, int pixel_maps)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   struct gl_program_parameter_list *params = _mesa_new_parameter_list();
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+   /*
+    * Get initial pixel color from the texture.
+    * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
+    */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = 0;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
+   v->samplers_used |= (1 << 0);
+
+   if (scale_and_bias) {
+      static const gl_state_index scale_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_SCALE,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      static const gl_state_index bias_state[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_PT_BIAS,
+           (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
+      GLint scale_p, bias_p;
+      st_src_reg scale, bias;
+
+      scale_p = _mesa_add_state_reference(params, scale_state);
+      bias_p = _mesa_add_state_reference(params, bias_state);
+
+      /* MAD colorTemp, colorTemp, scale, bias; */
+      scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
+      bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
+      inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
+   }
+
+   if (pixel_maps) {
+      st_src_reg temp = v->get_temp(glsl_type::vec4_type);
+      st_dst_reg temp_dst = st_dst_reg(temp);
+
+      assert(st->pixel_xfer.pixelmap_texture);
+
+      /* With a little effort, we can do four pixel map look-ups with
+       * two TEX instructions:
+       */
+
+      /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
+      temp_dst.writemask = WRITEMASK_XY; /* write R,G */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
+      src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+      temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
+      inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
+      inst->sampler = 1;
+      inst->tex_target = TEXTURE_2D_INDEX;
+
+      prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
+      v->samplers_used |= (1 << 1);
+
+      /* MOV colorTemp, temp; */
+      inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
+   }
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT &&
+             src_regs[i].index == FRAG_ATTRIB_COL0)
+         {
+            src_regs[i].file = PROGRAM_TEMPORARY;
+            src_regs[i].index = src0.index;
+         }
+         else if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_combine_parameter_lists(params,
+                                                    original->prog->Parameters);
+   _mesa_free_parameter_list(params);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
+/**
+ * Make fragment program for glBitmap:
+ *   Sample the texture and kill the fragment if the bit is 0.
+ * This program will be combined with the user's fragment program.
+ *
+ * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
+ */
+extern "C" void
+get_bitmap_visitor(struct st_fragment_program *fp,
+                   glsl_to_tgsi_visitor *original, int samplerIndex)
+{
+   glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
+   struct st_context *st = st_context(original->ctx);
+   struct gl_program *prog = &fp->Base.Base;
+   st_src_reg coord, src0;
+   st_dst_reg dst0;
+   glsl_to_tgsi_instruction *inst;
+
+   /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
+   v->ctx = original->ctx;
+   v->prog = prog;
+   v->glsl_version = original->glsl_version;
+   v->native_integers = original->native_integers;
+   v->options = original->options;
+   v->next_temp = original->next_temp;
+   v->num_address_regs = original->num_address_regs;
+   v->samplers_used = prog->SamplersUsed = original->samplers_used;
+   v->indirect_addr_temps = original->indirect_addr_temps;
+   v->indirect_addr_consts = original->indirect_addr_consts;
+   memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
+
+   /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
+   coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
+   src0 = v->get_temp(glsl_type::vec4_type);
+   dst0 = st_dst_reg(src0);
+   inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
+   inst->sampler = samplerIndex;
+   inst->tex_target = TEXTURE_2D_INDEX;
+
+   prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
+   prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
+   v->samplers_used |= (1 << samplerIndex);
+
+   /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
+   src0.negate = NEGATE_XYZW;
+   if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
+      src0.swizzle = SWIZZLE_XXXX;
+   inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
+
+   /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
+    * new visitor. */
+   foreach_iter(exec_list_iterator, iter, original->instructions) {
+      glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
+      st_src_reg src_regs[3];
+
+      if (inst->dst.file == PROGRAM_OUTPUT)
+         prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
+
+      for (int i=0; i<3; i++) {
+         src_regs[i] = inst->src[i];
+         if (src_regs[i].file == PROGRAM_INPUT)
+            prog->InputsRead |= (1 << src_regs[i].index);
+      }
+
+      v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
+   }
+
+   /* Make modifications to fragment program info. */
+   prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
+   count_resources(v, prog);
+   fp->glsl_to_tgsi = v;
+}
+
  /* ------------------------- TGSI conversion stuff -------------------------- */
  struct label {
     unsigned branch_target;
@@ -3245,8 +4068,9 @@ struct label {
  struct st_translate {
     struct ureg_program *ureg;
  
-   struct ureg_dst temps[MAX_PROGRAM_TEMPS];
+   struct ureg_dst temps[MAX_TEMPS];
     struct ureg_src *constants;
+   struct ureg_src *immediates;
     struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
     struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
     struct ureg_dst address[1];
@@ -3295,15 +4119,14 @@ static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
   * of labels built here and patch the TGSI code with the actual
   * location of each label.
   */
-static unsigned *get_label( struct st_translate *t,
-                            unsigned branch_target )
+static unsigned *get_label(struct st_translate *t, unsigned branch_target)
  {
     unsigned i;
  
     if (t->labels_count + 1 >= t->labels_size) {
        t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
        t->labels = (struct label *)realloc(t->labels, 
-                                          t->labels_size * sizeof t->labels[0]);
+                                          t->labels_size * sizeof(struct label));
        if (t->labels == NULL) {
           static unsigned dummy;
           t->error = TRUE;
@@ -3317,17 +4140,16 @@ static unsigned *get_label( struct st_translate *t,
  }
  
  /**
- * Called prior to emitting the TGSI code for each Mesa instruction.
+ * Called prior to emitting the TGSI code for each instruction.
   * Allocate additional space for instructions if needed.
- * Update the insn[] array so the next Mesa instruction points to
+ * Update the insn[] array so the next glsl_to_tgsi_instruction points to
   * the next TGSI instruction.
   */
-static void set_insn_start( struct st_translate *t,
-                            unsigned start )
+static void set_insn_start(struct st_translate *t, unsigned start)
  {
     if (t->insn_count + 1 >= t->insn_size) {
        t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
-      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
+      t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
        if (t->insn == NULL) {
           t->error = TRUE;
           return;
@@ -3338,20 +4160,45 @@ static void set_insn_start( struct st_translate *t,
  }
  
  /**
- * Map a Mesa dst register to a TGSI ureg_dst register.
+ * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
+ */
+static struct ureg_src
+emit_immediate(struct st_translate *t,
+               gl_constant_value values[4],
+               int type, int size)
+{
+   struct ureg_program *ureg = t->ureg;
+
+   switch(type)
+   {
+   case GL_FLOAT:
+      return ureg_DECL_immediate(ureg, &values[0].f, size);
+   case GL_INT:
+      return ureg_DECL_immediate_int(ureg, &values[0].i, size);
+   case GL_UNSIGNED_INT:
+   case GL_BOOL:
+      return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
+   default:
+      assert(!"should not get here - type must be float, int, uint, or bool");
+      return ureg_src_undef();
+   }
+}
+
+/**
+ * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
   */
  static struct ureg_dst
-dst_register( struct st_translate *t,
-              gl_register_file file,
-              GLuint index )
+dst_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
  {
-   switch( file ) {
+   switch(file) {
     case PROGRAM_UNDEFINED:
        return ureg_dst_undef();
  
     case PROGRAM_TEMPORARY:
        if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary( t->ureg );
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
  
        return t->temps[index];
  
@@ -3374,20 +4221,20 @@ dst_register( struct st_translate *t,
        return t->address[index];
  
     default:
-      debug_assert( 0 );
+      assert(!"unknown dst register file");
        return ureg_dst_undef();
     }
  }
  
  /**
- * Map a Mesa src register to a TGSI ureg_src register.
+ * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
   */
  static struct ureg_src
-src_register( struct st_translate *t,
-              gl_register_file file,
-              GLuint index )
+src_register(struct st_translate *t,
+             gl_register_file file,
+             GLuint index)
  {
-   switch( file ) {
+   switch(file) {
     case PROGRAM_UNDEFINED:
        return ureg_src_undef();
  
@@ -3395,7 +4242,7 @@ src_register( struct st_translate *t,
        assert(index >= 0);
        assert(index < Elements(t->temps));
        if (ureg_dst_is_undef(t->temps[index]))
-         t->temps[index] = ureg_DECL_temporary( t->ureg );
+         t->temps[index] = ureg_DECL_temporary(t->ureg);
        return ureg_src(t->temps[index]);
  
     case PROGRAM_NAMED_PARAM:
@@ -3407,10 +4254,13 @@ src_register( struct st_translate *t,
     case PROGRAM_STATE_VAR:
     case PROGRAM_CONSTANT:       /* ie, immediate */
        if (index < 0)
-         return ureg_DECL_constant( t->ureg, 0 );
+         return ureg_DECL_constant(t->ureg, 0);
        else
           return t->constants[index];
  
+   case PROGRAM_IMMEDIATE:
+      return t->immediates[index];
+
     case PROGRAM_INPUT:
        assert(t->inputMapping[index] < Elements(t->inputs));
        return t->inputs[t->inputMapping[index]];
@@ -3427,7 +4277,7 @@ src_register( struct st_translate *t,
        return t->systemValues[index];
  
     default:
-      debug_assert( 0 );
+      assert(!"unknown src register file");
        return ureg_src_undef();
     }
  }
@@ -3436,22 +4286,21 @@ src_register( struct st_translate *t,
   * Create a TGSI ureg_dst register from an st_dst_reg.
   */
  static struct ureg_dst
-translate_dst( struct st_translate *t,
-               const st_dst_reg *dst_reg,
-               boolean saturate )
+translate_dst(struct st_translate *t,
+              const st_dst_reg *dst_reg,
+              bool saturate)
  {
-   struct ureg_dst dst = dst_register( t, 
-                                       dst_reg->file,
-                                       dst_reg->index );
+   struct ureg_dst dst = dst_register(t, 
+                                      dst_reg->file,
+                                      dst_reg->index);
  
-   dst = ureg_writemask( dst, 
-                         dst_reg->writemask );
+   dst = ureg_writemask(dst, dst_reg->writemask);
     
     if (saturate)
-      dst = ureg_saturate( dst );
+      dst = ureg_saturate(dst);
  
     if (dst_reg->reladdr != NULL)
-      dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
+      dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
  
     return dst;
  }
@@ -3460,16 +4309,15 @@ translate_dst( struct st_translate *t,
   * Create a TGSI ureg_src register from an st_src_reg.
   */
  static struct ureg_src
-translate_src( struct st_translate *t,
-               const st_src_reg *src_reg )
+translate_src(struct st_translate *t, const st_src_reg *src_reg)
  {
-   struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
+   struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
  
-   src = ureg_swizzle( src,
-                       GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
-                       GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
+   src = ureg_swizzle(src,
+                      GET_SWZ(src_reg->swizzle, 0) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 1) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 2) & 0x3,
+                      GET_SWZ(src_reg->swizzle, 3) & 0x3);
  
     if ((src_reg->negate & 0xf) == NEGATE_XYZW)
        src = ureg_negate(src);
@@ -3500,40 +4348,59 @@ translate_src( struct st_translate *t,
     return src;
  }
  
+static struct tgsi_texture_offset
+translate_tex_offset(struct st_translate *t,
+                     const struct tgsi_texture_offset *in_offset)
+{
+   struct tgsi_texture_offset offset;
+
+   assert(in_offset->File == PROGRAM_IMMEDIATE);
+
+   offset.File = TGSI_FILE_IMMEDIATE;
+   offset.Index = in_offset->Index;
+   offset.SwizzleX = in_offset->SwizzleX;
+   offset.SwizzleY = in_offset->SwizzleY;
+   offset.SwizzleZ = in_offset->SwizzleZ;
+
+   return offset;
+}
+
  static void
-compile_tgsi_instruction(struct st_translate *t, 
-                                    const struct glsl_to_tgsi_instruction *inst)
+compile_tgsi_instruction(struct st_translate *t,
+                         const glsl_to_tgsi_instruction *inst)
  {
     struct ureg_program *ureg = t->ureg;
     GLuint i;
     struct ureg_dst dst[1];
     struct ureg_src src[4];
+   struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
+
     unsigned num_dst;
     unsigned num_src;
  
-   num_dst = num_inst_dst_regs( inst->op );
-   num_src = num_inst_src_regs( inst->op );
+   num_dst = num_inst_dst_regs(inst->op);
+   num_src = num_inst_src_regs(inst->op);
  
     if (num_dst) 
-      dst[0] = translate_dst( t, 
-                              &inst->dst,
-                              inst->saturate);
+      dst[0] = translate_dst(t, 
+                             &inst->dst,
+                             inst->saturate);
  
     for (i = 0; i < num_src; i++) 
-      src[i] = translate_src( t, &inst->src[i] );
+      src[i] = translate_src(t, &inst->src[i]);
  
-   switch( inst->op ) {
+   switch(inst->op) {
     case TGSI_OPCODE_BGNLOOP:
     case TGSI_OPCODE_CAL:
     case TGSI_OPCODE_ELSE:
     case TGSI_OPCODE_ENDLOOP:
     case TGSI_OPCODE_IF:
-      debug_assert(num_dst == 0);
-      ureg_label_insn( ureg,
-                       inst->op,
-                       src, num_src,
-                       get_label( t, 
-                                  inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0 ));
+      assert(num_dst == 0);
+      ureg_label_insn(ureg,
+                      inst->op,
+                      src, num_src,
+                      get_label(t, 
+                                inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
        return;
  
     case TGSI_OPCODE_TEX:
@@ -3541,36 +4408,30 @@ compile_tgsi_instruction(struct st_translate *t,
     case TGSI_OPCODE_TXD:
     case TGSI_OPCODE_TXL:
     case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXF:
        src[num_src++] = t->samplers[inst->sampler];
-      ureg_tex_insn( ureg,
-                     inst->op,
-                     dst, num_dst, 
-                     translate_texture_target( inst->tex_target,
-                                               inst->tex_shadow ),
-                     src, num_src );
+      for (i = 0; i < inst->tex_offset_num_offset; i++) {
+         texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
+      }
+      ureg_tex_insn(ureg,
+                    inst->op,
+                    dst, num_dst, 
+                    translate_texture_target(inst->tex_target, inst->tex_shadow),
+                    texoffsets, inst->tex_offset_num_offset,
+                    src, num_src);
        return;
  
     case TGSI_OPCODE_SCS:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
-      break;
-
-   case TGSI_OPCODE_XPD:
-      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
+      dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
+      ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
        break;
  
     default:
-      ureg_insn( ureg, 
-                 inst->op, 
-                 dst, num_dst, 
-                 src, num_src );
+      ureg_insn(ureg,
+                inst->op,
+                dst, num_dst,
+                src, num_src);
        break;
     }
  }
@@ -3580,9 +4441,9 @@ compile_tgsi_instruction(struct st_translate *t,
   * Basically, add (adjX, adjY) to the fragment position.
   */
  static void
-emit_adjusted_wpos( struct st_translate *t,
-                    const struct gl_program *program,
-                    GLfloat adjX, GLfloat adjY)
+emit_adjusted_wpos(struct st_translate *t,
+                   const struct gl_program *program,
+                   float adjX, float adjY)
  {
     struct ureg_program *ureg = t->ureg;
     struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
@@ -3604,9 +4465,9 @@ emit_adjusted_wpos( struct st_translate *t,
   * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
   */
  static void
-emit_wpos_inversion( struct st_translate *t,
-                     const struct gl_program *program,
-                     boolean invert)
+emit_wpos_inversion(struct st_translate *t,
+                    const struct gl_program *program,
+                    bool invert)
  {
     struct ureg_program *ureg = t->ureg;
  
@@ -3625,7 +4486,7 @@ emit_wpos_inversion( struct st_translate *t,
     unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
                                                         wposTransformState);
  
-   struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
+   struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
     struct ureg_dst wpos_temp;
     struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
  
@@ -3634,26 +4495,26 @@ emit_wpos_inversion( struct st_translate *t,
     if (wpos_input.File == TGSI_FILE_TEMPORARY)
        wpos_temp = ureg_dst(wpos_input);
     else {
-      wpos_temp = ureg_DECL_temporary( ureg );
-      ureg_MOV( ureg, wpos_temp, wpos_input );
+      wpos_temp = ureg_DECL_temporary(ureg);
+      ureg_MOV(ureg, wpos_temp, wpos_input);
     }
  
     if (invert) {
        /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
         */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 0),
-                ureg_scalar(wpostrans, 1));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 0),
+               ureg_scalar(wpostrans, 1));
     } else {
        /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
         */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 2),
-                ureg_scalar(wpostrans, 3));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 2),
+               ureg_scalar(wpostrans, 3));
     }
  
     /* Use wpos_temp as position input from here on:
@@ -3797,7 +4658,7 @@ st_translate_program(
     const GLuint outputMapping[],
     const ubyte outputSemanticName[],
     const ubyte outputSemanticIndex[],
-   boolean passthrough_edgeflags )
+   boolean passthrough_edgeflags)
  {
     struct st_translate translate, *t;
     unsigned i;
@@ -3843,27 +4704,24 @@ st_translate_program(
        for (i = 0; i < numOutputs; i++) {
           switch (outputSemanticName[i]) {
           case TGSI_SEMANTIC_POSITION:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_POSITION, /* Z / Depth */
-                                              outputSemanticIndex[i] );
-
-            t->outputs[i] = ureg_writemask( t->outputs[i],
-                                            TGSI_WRITEMASK_Z );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_POSITION, /* Z/Depth */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
              break;
           case TGSI_SEMANTIC_STENCIL:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
-                                              outputSemanticIndex[i] );
-            t->outputs[i] = ureg_writemask( t->outputs[i],
-                                            TGSI_WRITEMASK_Y );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                             outputSemanticIndex[i]);
+            t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
              break;
           case TGSI_SEMANTIC_COLOR:
-            t->outputs[i] = ureg_DECL_output( ureg,
-                                              TGSI_SEMANTIC_COLOR,
-                                              outputSemanticIndex[i] );
+            t->outputs[i] = ureg_DECL_output(ureg,
+                                             TGSI_SEMANTIC_COLOR,
+                                             outputSemanticIndex[i]);
              break;
           default:
-            debug_assert(0);
+            assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
              return PIPE_ERROR_BAD_INPUT;
           }
        }
@@ -3877,9 +4735,9 @@ st_translate_program(
        }
  
        for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output( ureg,
-                                           outputSemanticName[i],
-                                           outputSemanticIndex[i] );
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
        }
     }
     else {
@@ -3890,9 +4748,9 @@ st_translate_program(
        }
  
        for (i = 0; i < numOutputs; i++) {
-         t->outputs[i] = ureg_DECL_output( ureg,
-                                           outputSemanticName[i],
-                                           outputSemanticIndex[i] );
+         t->outputs[i] = ureg_DECL_output(ureg,
+                                          outputSemanticName[i],
+                                          outputSemanticIndex[i]);
           if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
              /* Writing to the point size result register requires special
               * handling to implement clamping.
@@ -3906,8 +4764,8 @@ st_translate_program(
              unsigned pointSizeClampConst =
                 _mesa_add_state_reference(proginfo->Parameters,
                                           pointSizeClampState);
-            struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
-            t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
+            struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
+            t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
              t->pointSizeResult = t->outputs[i];
              t->pointSizeOutIndex = i;
              t->outputs[i] = psizregtemp;
@@ -3920,8 +4778,8 @@ st_translate_program(
     /* Declare address register.
      */
     if (program->num_address_regs > 0) {
-      debug_assert( program->num_address_regs == 1 );
-      t->address[0] = ureg_DECL_address( ureg );
+      assert(program->num_address_regs == 1);
+      t->address[0] = ureg_DECL_address(ureg);
     }
  
     /* Declare misc input registers
@@ -3946,16 +4804,15 @@ st_translate_program(
         */
        for (i = 0; i < (unsigned)program->next_temp; i++) {
           /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
-         t->temps[i] = ureg_DECL_temporary( t->ureg );
+         t->temps[i] = ureg_DECL_temporary(t->ureg);
        }
     }
  
-   /* Emit constants and immediates.  Mesa uses a single index space
-    * for these, so we put all the translated regs in t->constants.
-    * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
+   /* Emit constants and uniforms.  TGSI uses a single index space for these, 
+    * so we put all the translated regs in t->constants.
      */
     if (proginfo->Parameters) {
-      t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
+      t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
        if (t->constants == NULL) {
           ret = PIPE_ERROR_OUT_OF_MEMORY;
           goto out;
@@ -3968,65 +4825,55 @@ st_translate_program(
           case PROGRAM_STATE_VAR:
           case PROGRAM_NAMED_PARAM:
           case PROGRAM_UNIFORM:
-            t->constants[i] = ureg_DECL_constant( ureg, i );
+            t->constants[i] = ureg_DECL_constant(ureg, i);
              break;
  
-            /* Emit immediates only when there's no indirect addressing of
-             * the const buffer.
-             * FIXME: Be smarter and recognize param arrays:
-             * indirect addressing is only valid within the referenced
-             * array.
-             */
+         /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
+          * addressing of the const buffer.
+          * FIXME: Be smarter and recognize param arrays:
+          * indirect addressing is only valid within the referenced
+          * array.
+          */
           case PROGRAM_CONSTANT:
              if (program->indirect_addr_consts)
-               t->constants[i] = ureg_DECL_constant( ureg, i );
+               t->constants[i] = ureg_DECL_constant(ureg, i);
              else
-               switch(proginfo->Parameters->Parameters[i].DataType)
-               {
-               case GL_FLOAT:
-               case GL_FLOAT_VEC2:
-               case GL_FLOAT_VEC3:
-               case GL_FLOAT_VEC4:
-                  t->constants[i] = ureg_DECL_immediate(ureg, (float *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               case GL_INT:
-               case GL_INT_VEC2:
-               case GL_INT_VEC3:
-               case GL_INT_VEC4:
-                  t->constants[i] = ureg_DECL_immediate_int(ureg, (int *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               case GL_UNSIGNED_INT:
-               case GL_UNSIGNED_INT_VEC2:
-               case GL_UNSIGNED_INT_VEC3:
-               case GL_UNSIGNED_INT_VEC4:
-               case GL_BOOL:
-               case GL_BOOL_VEC2:
-               case GL_BOOL_VEC3:
-               case GL_BOOL_VEC4:
-                  t->constants[i] = ureg_DECL_immediate_uint(ureg, (unsigned *)proginfo->Parameters->ParameterValues[i], 4);
-                  break;
-               default:
-                  assert(!"should not get here");
-               }
+               t->constants[i] = emit_immediate(t,
+                                                proginfo->Parameters->ParameterValues[i],
+                                                proginfo->Parameters->Parameters[i].DataType,
+                                                4);
              break;
           default:
              break;
           }
        }
     }
+   
+   /* Emit immediate values.
+    */
+   t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
+   if (t->immediates == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto out;
+   }
+   i = 0;
+   foreach_iter(exec_list_iterator, iter, program->immediates) {
+      immediate_storage *imm = (immediate_storage *)iter.get();
+      t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
+   }
  
     /* texture samplers */
     for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
        if (program->samplers_used & (1 << i)) {
-         t->samplers[i] = ureg_DECL_sampler( ureg, i );
+         t->samplers[i] = ureg_DECL_sampler(ureg, i);
        }
     }
  
     /* Emit each instruction in turn:
      */
     foreach_iter(exec_list_iterator, iter, program->instructions) {
-      set_insn_start( t, ureg_get_instruction_number( ureg ));
-      compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
+      set_insn_start(t, ureg_get_instruction_number(ureg));
+      compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
  
        if (t->prevInstWrotePointSize && proginfo->Id) {
           /* The previous instruction wrote to the (fake) vertex point size
@@ -4036,14 +4883,14 @@ st_translate_program(
            * Note that we can't do this easily at the end of program due to
            * possible early return.
            */
-         set_insn_start( t, ureg_get_instruction_number( ureg ));
-         ureg_MAX( t->ureg,
-                   ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
-                   ureg_src(t->outputs[t->pointSizeOutIndex]),
-                   ureg_swizzle(t->pointSizeConst, 1,1,1,1));
-         ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
-                   ureg_src(t->outputs[t->pointSizeOutIndex]),
-                   ureg_swizzle(t->pointSizeConst, 2,2,2,2));
+         set_insn_start(t, ureg_get_instruction_number(ureg));
+         ureg_MAX(t->ureg,
+                  ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 1,1,1,1));
+         ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
+                  ureg_src(t->outputs[t->pointSizeOutIndex]),
+                  ureg_swizzle(t->pointSizeConst, 2,2,2,2));
        }
        t->prevInstWrotePointSize = GL_FALSE;
     }
@@ -4051,15 +4898,15 @@ st_translate_program(
     /* Fix up all emitted labels:
      */
     for (i = 0; i < t->labels_count; i++) {
-      ureg_fixup_label( ureg,
-                        t->labels[i].token,
-                        t->insn[t->labels[i].branch_target] );
+      ureg_fixup_label(ureg, t->labels[i].token,
+                       t->insn[t->labels[i].branch_target]);
     }
  
  out:
     FREE(t->insn);
     FREE(t->labels);
     FREE(t->constants);
+   FREE(t->immediates);
  
     if (t->error) {
        debug_printf("%s: translate error flag set\n", __FUNCTION__);
@@ -4082,7 +4929,7 @@ get_mesa_program(struct gl_context *ctx,
     struct gl_program *prog;
     GLenum target;
     const char *target_string;
-   GLboolean progress;
+   bool progress;
     struct gl_shader_compiler_options *options =
           &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
  
@@ -4110,13 +4957,12 @@ get_mesa_program(struct gl_context *ctx,
     if (!prog)
        return NULL;
     prog->Parameters = _mesa_new_parameter_list();
-   prog->Varying = _mesa_new_parameter_list();
-   prog->Attributes = _mesa_new_parameter_list();
     v->ctx = ctx;
     v->prog = prog;
     v->shader_program = shader_program;
     v->options = options;
     v->glsl_version = ctx->Const.GLSLVersion;
+   v->native_integers = ctx->Const.NativeIntegers;
  
     add_uniforms_to_parameters_list(shader_program, shader, prog);
  
@@ -4170,17 +5016,20 @@ get_mesa_program(struct gl_context *ctx,
     v->remove_output_reads(PROGRAM_OUTPUT);
     if (target == GL_VERTEX_PROGRAM_ARB)
        v->remove_output_reads(PROGRAM_VARYING);
+   
+   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
+   v->simplify_cmp();
+   v->copy_propagate();
+   while (v->eliminate_dead_code_advanced());
  
-   /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor.
-    * FIXME: These passes to optimize temporary registers don't work when there
+   /* FIXME: These passes to optimize temporary registers don't work when there
      * is indirect addressing of the temporary register space.  We need proper 
      * array support so that we don't have to give up these passes in every 
      * shader that uses arrays.
      */
     if (!v->indirect_addr_temps) {
-      v->copy_propagate();
-      v->merge_registers();
        v->eliminate_dead_code();
+      v->merge_registers();
        v->renumber_registers();
     }
     
@@ -4286,19 +5135,19 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
           /* Lowering */
           do_mat_op_to_vec(ir);
           lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
-                                | LOG_TO_LOG2
+                                | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
  
           progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
  
           progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
  
-         progress = lower_quadop_vector(ir, true) || progress;
+         progress = lower_quadop_vector(ir, false) || progress;
  
-         if (options->EmitNoIfs) {
+         if (options->MaxIfDepth == 0)
              progress = lower_discard(ir) || progress;
-            progress = lower_if_to_cond_assign(ir) || progress;
-         }
+
+         progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
  
           if (options->EmitNoNoise)
              progress = lower_noise(ir) || progress;
@@ -4331,29 +5180,18 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
        linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
  
        if (linked_prog) {
-         bool ok = true;
-
-         switch (prog->_LinkedShaders[i]->Type) {
-         case GL_VERTEX_SHADER:
-            _mesa_reference_vertprog(ctx, &prog->VertexProgram,
-                                     (struct gl_vertex_program *)linked_prog);
-            ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
-                                                 linked_prog);
-            break;
-         case GL_FRAGMENT_SHADER:
-            _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
-                                     (struct gl_fragment_program *)linked_prog);
-            ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
-                                                 linked_prog);
-            break;
-         case GL_GEOMETRY_SHADER:
-            _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
-                                     (struct gl_geometry_program *)linked_prog);
-            ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
-                                                 linked_prog);
-            break;
-         }
-         if (!ok) {
+        static const GLenum targets[] = {
+           GL_VERTEX_PROGRAM_ARB,
+           GL_FRAGMENT_PROGRAM_ARB,
+           GL_GEOMETRY_PROGRAM_NV
+        };
+
+        _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+                                linked_prog);
+         if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) {
+           _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
+                                   NULL);
+            _mesa_reference_program(ctx, &linked_prog, NULL);
              return GL_FALSE;
           }
        }
@@ -4364,53 +5202,4 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
     return GL_TRUE;
  }
  
-
-/**
- * Link a GLSL shader program.  Called via glLinkProgram().
- */
-void
-st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
-{
-   unsigned int i;
-
-   _mesa_clear_shader_program_data(ctx, prog);
-
-   prog->LinkStatus = GL_TRUE;
-
-   for (i = 0; i < prog->NumShaders; i++) {
-      if (!prog->Shaders[i]->CompileStatus) {
-         fail_link(prog, "linking with uncompiled shader");
-         prog->LinkStatus = GL_FALSE;
-      }
-   }
-
-   prog->Varying = _mesa_new_parameter_list();
-   _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
-   _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
-   _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
-
-   if (prog->LinkStatus) {
-      link_shaders(ctx, prog);
-   }
-
-   if (prog->LinkStatus) {
-      if (!ctx->Driver.LinkShader(ctx, prog)) {
-         prog->LinkStatus = GL_FALSE;
-      }
-   }
-
-   set_uniform_initializers(ctx, prog);
-
-   if (ctx->Shader.Flags & GLSL_DUMP) {
-      if (!prog->LinkStatus) {
-         printf("GLSL shader program %d failed to link\n", prog->Name);
-      }
-
-      if (prog->InfoLog && prog->InfoLog[0] != 0) {
-         printf("GLSL shader program %d info log:\n", prog->Name);
-         printf("%s\n", prog->InfoLog);
-      }
-   }
-}
-
  } /* extern "C" */