st/nir: Fix st_nir_opts() prototype.

[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 93b5cc7c0b24608c94f44f0010a0deced0650039..00799b4a872e266f0733a488bb28a8140e2ab762 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -98,7 +98,7 @@ public:
  
  class immediate_storage : public exec_node {
  public:
-   immediate_storage(gl_constant_value *values, int size32, int type)
+   immediate_storage(gl_constant_value *values, int size32, GLenum type)
     {
        memcpy(this->values, values, size32 * sizeof(gl_constant_value));
        this->size32 = size32;
@@ -108,7 +108,7 @@ public:
     /* doubles are stored across 2 gl_constant_values */
     gl_constant_value values[4];
     int size32; /**< Number of 32-bit components (1-4) */
-   int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
+   GLenum type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
  };
  
  static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
@@ -123,6 +123,7 @@ struct inout_decl {
     enum glsl_interp_mode interp;
     enum glsl_base_type base_type;
     ubyte usage_mask; /* GLSL-style usage-mask,  i.e. single bit per double */
+   bool invariant;
  };
  
  static struct inout_decl *
@@ -189,10 +190,10 @@ public:
     int num_address_regs;
     uint32_t samplers_used;
     glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
-   int sampler_targets[PIPE_MAX_SAMPLERS];   /**< One of TGSI_TEXTURE_* */
+   enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
     int images_used;
-   int image_targets[PIPE_MAX_SHADER_IMAGES];
-   unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
+   enum tgsi_texture_type image_targets[PIPE_MAX_SHADER_IMAGES];
+   enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
     bool indirect_addr_consts;
     int wpos_transform_const;
  
@@ -207,7 +208,7 @@ public:
     variable_storage *find_variable_storage(ir_variable *var);
  
     int add_constant(gl_register_file file, gl_constant_value values[8],
-                    int size, int datatype, uint16_t *swizzle_out);
+                    int size, GLenum datatype, uint16_t *swizzle_out);
  
     st_src_reg get_temp(const glsl_type *type);
     void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
@@ -255,7 +256,7 @@ public:
     void visit_membar_intrinsic(ir_call *);
     void visit_shared_intrinsic(ir_call *);
     void visit_image_intrinsic(ir_call *);
-   void visit_generic_intrinsic(ir_call *, unsigned op);
+   void visit_generic_intrinsic(ir_call *, enum tgsi_opcode op);
  
     st_src_reg result;
  
@@ -269,23 +270,23 @@ public:
     /** List of glsl_to_tgsi_instruction */
     exec_list instructions;
  
-   glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
+   glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op,
                                        st_dst_reg dst = undef_dst,
                                        st_src_reg src0 = undef_src,
                                        st_src_reg src1 = undef_src,
                                        st_src_reg src2 = undef_src,
                                        st_src_reg src3 = undef_src);
  
-   glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
+   glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op,
                                        st_dst_reg dst, st_dst_reg dst1,
                                        st_src_reg src0 = undef_src,
                                        st_src_reg src1 = undef_src,
                                        st_src_reg src2 = undef_src,
                                        st_src_reg src3 = undef_src);
  
-   unsigned get_opcode(unsigned op,
-                    st_dst_reg dst,
-                    st_src_reg src0, st_src_reg src1);
+   enum tgsi_opcode get_opcode(enum tgsi_opcode op,
+                               st_dst_reg dst,
+                               st_src_reg src0, st_src_reg src1);
  
     /**
      * Emit the correct dot-product instruction for the type of arguments
@@ -296,10 +297,10 @@ public:
                                       st_src_reg src1,
                                       unsigned elements);
  
-   void emit_scalar(ir_instruction *ir, unsigned op,
+   void emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
                      st_dst_reg dst, st_src_reg src0);
  
-   void emit_scalar(ir_instruction *ir, unsigned op,
+   void emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
                      st_dst_reg dst, st_src_reg src0, st_src_reg src1);
  
     void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
@@ -348,12 +349,16 @@ public:
     void *mem_ctx;
  };
  
-static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
-static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
-static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
+static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X,
+                                           GLSL_TYPE_FLOAT, 0);
+static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X,
+                                            GLSL_TYPE_FLOAT, 1);
+static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X,
+                                               GLSL_TYPE_FLOAT, 2);
  
  static void
-fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
+fail_link(struct gl_shader_program *prog, const char *fmt, ...)
+   PRINTFLIKE(2, 3);
  
  static void
  fail_link(struct gl_shader_program *prog, const char *fmt, ...)
@@ -363,7 +368,7 @@ fail_link(struct gl_shader_program *prog, const char *fmt, ...)
     ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args);
     va_end(args);
  
-   prog->data->LinkStatus = linking_failure;
+   prog->data->LinkStatus = LINKING_FAILURE;
  }
  
  int
@@ -382,7 +387,7 @@ swizzle_for_size(int size)
  
  
  glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op,
                                 st_dst_reg dst, st_dst_reg dst1,
                                 st_src_reg src0, st_src_reg src1,
                                 st_src_reg src2, st_src_reg src3)
@@ -445,7 +450,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
  
     /* Update indirect addressing status used by TGSI */
     if (dst.reladdr || dst.reladdr2) {
-      switch(dst.file) {
+      switch (dst.file) {
        case PROGRAM_STATE_VAR:
        case PROGRAM_CONSTANT:
        case PROGRAM_UNIFORM:
@@ -460,8 +465,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
     }
     else {
        for (i = 0; i < 4; i++) {
-         if(inst->src[i].reladdr) {
-            switch(inst->src[i].file) {
+         if (inst->src[i].reladdr) {
+            switch (inst->src[i].file) {
              case PROGRAM_STATE_VAR:
              case PROGRAM_CONSTANT:
              case PROGRAM_UNIFORM:
@@ -497,8 +502,11 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
      */
     for (j = 0; j < 2; j++) {
        dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type);
-      if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
-         enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id);
+      if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT &&
+          inst->dst[j].type == GLSL_TYPE_ARRAY) {
+         enum glsl_base_type type = find_array_type(this->outputs,
+                                                    this->num_outputs,
+                                                    inst->dst[j].array_id);
           if (glsl_base_type_is_64bit(type))
              dst_is_64bit[j] = true;
        }
@@ -510,7 +518,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
        int initial_src_swz[4], initial_src_idx[4];
        int initial_dst_idx[2], initial_dst_writemask[2];
        /* select the writemask for dst0 or dst1 */
-      unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
+      unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED
+         ? inst->dst[0].writemask : inst->dst[1].writemask;
  
        /* copy out the writemask, index and swizzles for all src/dsts. */
        for (j = 0; j < 2; j++) {
@@ -532,9 +541,10 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
  
           int i = u_bit_scan(&writemask);
  
-         /* before emitting the instruction, see if we have to adjust load / store
-          * address */
-         if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) &&
+         /* before emitting the instruction, see if we have to adjust
+          * load / store address */
+         if (i > 1 && (inst->op == TGSI_OPCODE_LOAD ||
+                       inst->op == TGSI_OPCODE_STORE) &&
               addr.file == PROGRAM_UNDEFINED) {
              /* We have to advance the buffer address by 16 */
              addr = get_temp(glsl_type::uint_type);
@@ -561,14 +571,16 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
                 dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
                 dinst->dst[j].index = initial_dst_idx[j];
                 if (i > 1) {
-                  if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE)
+                  if (dinst->op == TGSI_OPCODE_LOAD ||
+                      dinst->op == TGSI_OPCODE_STORE)
                       dinst->src[0] = addr;
                    if (dinst->op != TGSI_OPCODE_STORE)
                       dinst->dst[j].index++;
                 }
              } else {
-               /* if we aren't writing to a double, just get the bit of the initial writemask
-                  for this channel */
+               /* if we aren't writing to a double, just get the bit of the
+                * initial writemask for this channel
+                */
                 dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
              }
           }
@@ -585,14 +597,19 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
                 }
  
                 if (swz & 1)
-                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
+                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W,
+                                                        SWIZZLE_Z, SWIZZLE_W);
                 else
-                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+                                                        SWIZZLE_X, SWIZZLE_Y);
  
              } else {
                 /* some opcodes are special case in what they use as sources
-                  - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is integer src1 */
-               if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D ||
+                * - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is
+                * integer src1
+                */
+               if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D ||
+                   op == TGSI_OPCODE_I2D ||
                     op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 ||
                     op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP ||
                     (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
@@ -611,7 +628,7 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
  }
  
  glsl_to_tgsi_instruction *
-glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op,
                                 st_dst_reg dst,
                                 st_src_reg src0, st_src_reg src1,
                                 st_src_reg src2, st_src_reg src3)
@@ -623,8 +640,8 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
   * Determines whether to use an integer, unsigned integer, or float opcode
   * based on the operands and input opcode, then emits the result.
   */
-unsigned
-glsl_to_tgsi_visitor::get_opcode(unsigned op,
+enum tgsi_opcode
+glsl_to_tgsi_visitor::get_opcode(enum tgsi_opcode op,
                                   st_dst_reg dst,
                                   st_src_reg src0, st_src_reg src1)
  {
@@ -685,7 +702,7 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
           op = TGSI_OPCODE_##c; \
        break;
  
-   switch(op) {
+   switch (op) {
        /* Some instructions are initially selected without considering the type.
         * This fixes the type:
         *
@@ -722,7 +739,8 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
        casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
        casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
  
-      default: break;
+      default:
+         break;
     }
  
     assert(op != TGSI_OPCODE_LAST);
@@ -734,7 +752,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
                                st_dst_reg dst, st_src_reg src0, st_src_reg src1,
                                unsigned elements)
  {
-   static const unsigned dot_opcodes[] = {
+   static const enum tgsi_opcode dot_opcodes[] = {
        TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
     };
  
@@ -750,7 +768,7 @@ glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
   * to produce dest channels.
   */
  void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
                                    st_dst_reg dst,
                                    st_src_reg orig_src0, st_src_reg orig_src1)
  {
@@ -794,7 +812,7 @@ glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
  }
  
  void
-glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
+glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op,
                                    st_dst_reg dst, st_src_reg src0)
  {
     st_src_reg undef = undef_src;
@@ -808,7 +826,7 @@ void
  glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
                                 st_dst_reg dst, st_src_reg src0)
  {
-   int op = TGSI_OPCODE_ARL;
+   enum tgsi_opcode op = TGSI_OPCODE_ARL;
  
     if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) {
        if (!this->need_uarl && src0.is_legal_tgsi_address_operand())
@@ -826,13 +844,15 @@ glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
  
  int
  glsl_to_tgsi_visitor::add_constant(gl_register_file file,
-                                   gl_constant_value values[8], int size, int datatype,
+                                   gl_constant_value values[8], int size,
+                                   GLenum datatype,
                                     uint16_t *swizzle_out)
  {
     if (file == PROGRAM_CONSTANT) {
        GLuint swizzle = swizzle_out ? *swizzle_out : 0;
-      int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
-                                                    size, datatype, &swizzle);
+      int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters,
+                                                    values, size, datatype,
+                                                    &swizzle);
        if (swizzle_out)
           *swizzle_out = swizzle;
        return result;
@@ -844,7 +864,7 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
     immediate_storage *entry;
     int size32 = size * ((datatype == GL_DOUBLE ||
                           datatype == GL_INT64_ARB ||
-                         datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
+                         datatype == GL_UNSIGNED_INT64_ARB) ? 2 : 1);
     int i;
  
     /* Search immediate storage to see if we already have an identical
@@ -875,7 +895,8 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
     for (i = 0; i * 4 < size32; i++) {
        int slot_size = MIN2(size32 - (i * 4), 4);
        /* Add this immediate to the list. */
-      entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
+      entry = new(mem_ctx) immediate_storage(&values[i * 4],
+                                             slot_size, datatype);
        this->immediates.push_tail(entry);
        this->num_immediates++;
     }
@@ -963,7 +984,7 @@ add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf,
      * emit_asm() might have actually split the op into pieces, e.g. for
      * double stores. We have to go back and fix up all the generated ops.
      */
-   unsigned op = inst->op;
+   enum tgsi_opcode op = inst->op;
     do {
        inst->resource = *buf;
        if (access)
@@ -1115,7 +1136,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir)
  
        for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
           int index = _mesa_add_state_reference(this->prog->Parameters,
-                                               (gl_state_index *)slots[i].tokens);
+                                               slots[i].tokens);
  
           if (storage->file == PROGRAM_STATE_VAR) {
              if (storage->index == -1) {
@@ -1242,7 +1263,8 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
   * instruction.
   */
  bool
-glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
+glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir,
+                                               int try_operand)
  {
     const int other_operand = 1 - try_operand;
     st_src_reg a, b;
@@ -1271,8 +1293,10 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
     if (!reg->reladdr && !reg->reladdr2)
        return;
  
-   if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
-   if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
+   if (reg->reladdr)
+      emit_arl(ir, address_reg, *reg->reladdr);
+   if (reg->reladdr2)
+      emit_arl(ir, address_reg2, *reg->reladdr2);
  
     if (*num_reladdr != 1) {
        st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1));
@@ -1341,10 +1365,33 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
     st_dst_reg result_dst;
  
     int vector_elements = ir->operands[0]->type->vector_elements;
-   if (ir->operands[1]) {
+   if (ir->operands[1] &&
+       ir->operation != ir_binop_interpolate_at_offset &&
+       ir->operation != ir_binop_interpolate_at_sample) {
+      st_src_reg *swz_op = NULL;
+      if (vector_elements > ir->operands[1]->type->vector_elements) {
+         assert(ir->operands[1]->type->vector_elements == 1);
+         swz_op = &op[1];
+      } else if (vector_elements < ir->operands[1]->type->vector_elements) {
+         assert(ir->operands[0]->type->vector_elements == 1);
+         swz_op = &op[0];
+      }
+      if (swz_op) {
+         uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0);
+         swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
+                                         swizzle_x, swizzle_x);
+      }
        vector_elements = MAX2(vector_elements,
                               ir->operands[1]->type->vector_elements);
     }
+   if (ir->operands[2] &&
+       ir->operands[2]->type->vector_elements != vector_elements) {
+      /* This can happen with ir_triop_lrp, i.e. glsl mix */
+      assert(ir->operands[2]->type->vector_elements == 1);
+      uint16_t swizzle_x = GET_SWZ(op[2].swizzle, 0);
+      op[2].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x,
+                                    swizzle_x, swizzle_x);
+   }
  
     this->result.file = PROGRAM_UNDEFINED;
  
@@ -1371,13 +1418,16 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
            * 0.0 and 1.0, 1-x also implements !x.
            */
           op[0].negate = ~op[0].negate;
-         emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
+         emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0],
+                  st_src_reg_for_float(1.0));
        }
        break;
     case ir_unop_neg:
-      if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+      if (result_dst.type == GLSL_TYPE_INT64 ||
+          result_dst.type == GLSL_TYPE_UINT64)
           emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]);
-      else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
+      else if (result_dst.type == GLSL_TYPE_INT ||
+               result_dst.type == GLSL_TYPE_UINT)
           emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
        else if (result_dst.type == GLSL_TYPE_DOUBLE)
           emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
@@ -1394,7 +1444,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
           emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
        else if (result_dst.type == GLSL_TYPE_DOUBLE)
           emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
-      else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+      else if (result_dst.type == GLSL_TYPE_INT64 ||
+               result_dst.type == GLSL_TYPE_UINT64)
           emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]);
        else
           emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
@@ -1446,7 +1497,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
         * is a FBO or the window system buffer, respectively.
         * It is then multiplied with the source operand of DDY.
         */
-      static const gl_state_index transform_y_state[STATE_LENGTH]
+      static const gl_state_index16 transform_y_state[STATE_LENGTH]
           = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
  
        unsigned transform_y_index =
@@ -1576,7 +1627,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
               */
              st_src_reg sge_src = result_src;
              sge_src.negate = ~sge_src.negate;
-            emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
+            emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src,
+                     st_src_reg_for_float(0.0));
           }
        } else {
           emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
@@ -1637,13 +1689,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
                  */
                 dp->saturate = true;
              } else {
-               /* Negating the result of the dot-product gives values on the range
-                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
-                * achieved using SLT.
+               /* Negating the result of the dot-product gives values on the
+                * range [-4, 0].  Zero stays zero, and negative values become
+                * 1.0.  This achieved using SLT.
                  */
                 st_src_reg slt_src = result_src;
                 slt_src.negate = ~slt_src.negate;
-               emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+               emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src,
+                        st_src_reg_for_float(0.0));
              }
           }
        } else {
@@ -1673,17 +1726,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
              emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
           if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
              /* The clamping to [0,1] can be done for free in the fragment
-             * shader with a saturate if floats are being used as boolean values.
+             * shader with a saturate if floats are being used as boolean
+             * values.
               */
              add->saturate = true;
           } else {
              /* Negating the result of the addition gives values on the range
-             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
-             * is achieved using SLT.
+             * [-2, 0].  Zero stays zero, and negative values become 1.0
+             * This is achieved using SLT.
               */
              st_src_reg slt_src = result_src;
              slt_src.negate = ~slt_src.negate;
-            emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
+            emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src,
+                     st_src_reg_for_float(0.0));
           }
        }
        break;
@@ -1732,7 +1787,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        /* fallthrough to next case otherwise */
     case ir_unop_b2f:
        if (native_integers) {
-         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
+         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0],
+                  st_src_reg_for_float(1.0));
           break;
        }
        /* fallthrough to next case otherwise */
@@ -1750,7 +1806,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
            * GLSL requires that int(bool) return 1 for true and 0 for false.
            * This conversion is done with AND, but it could be done with NEG.
            */
-         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
+         emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0],
+                  st_src_reg_for_int(1));
        } else {
           /* Booleans and integers are both stored as floats when native
            * integers are disabled.
@@ -1786,16 +1843,20 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        result_src.type = GLSL_TYPE_FLOAT;
        break;
     case ir_unop_f2b:
-      emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+      emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0],
+               st_src_reg_for_float(0.0));
        break;
     case ir_unop_d2b:
-      emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
+      emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0],
+               st_src_reg_for_double(0.0));
        break;
     case ir_unop_i2b:
        if (native_integers)
-         emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
+         emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0],
+                  st_src_reg_for_int(0));
        else
-         emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
+         emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0],
+                  st_src_reg_for_float(0.0));
        break;
     case ir_unop_bitcast_u642d:
     case ir_unop_bitcast_i642d:
@@ -1849,15 +1910,16 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
     case ir_binop_lshift:
     case ir_binop_rshift:
        if (native_integers) {
-         unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL
-                                                            : TGSI_OPCODE_ISHR;
+         enum tgsi_opcode opcode = ir->operation == ir_binop_lshift
+            ? TGSI_OPCODE_SHL : TGSI_OPCODE_ISHR;
           st_src_reg count;
  
           if (glsl_base_type_is_64bit(op[0].type)) {
              /* GLSL shift operations have 32-bit shift counts, but TGSI uses
               * 64 bits.
               */
-            count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components()));
+            count = get_temp(glsl_type::u64vec(ir->operands[1]
+                                               ->type->components()));
              emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]);
           } else {
              count = op[1];
@@ -2053,7 +2115,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        break;
     case ir_binop_interpolate_at_offset: {
        /* The y coordinate needs to be flipped for the default fb */
-      static const gl_state_index transform_y_state[STATE_LENGTH]
+      static const gl_state_index16 transform_y_state[STATE_LENGTH]
           = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
  
        unsigned transform_y_index =
@@ -2128,10 +2190,11 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
  
     case ir_unop_get_buffer_size: {
        ir_constant *const_offset = ir->operands[0]->as_constant();
+      int buf_base = ctx->st->has_hw_atomics
+         ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
        st_src_reg buffer(
              PROGRAM_BUFFER,
-            ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
-            (const_offset ? const_offset->value.u[0] : 0),
+            buf_base + (const_offset ? const_offset->value.u[0] : 0),
              GLSL_TYPE_UINT);
        if (!const_offset) {
           buffer.reladdr = ralloc(mem_ctx, st_src_reg);
@@ -2183,12 +2246,15 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
           temp_dst.writemask = WRITEMASK_X;
           if (vector_elements > 3)
              temp_dst.writemask |= WRITEMASK_Z;
-         op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2),
-                                       GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3));
+         op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2),
+                                       GET_SWZ(orig_swz, 2),
+                                       GET_SWZ(orig_swz, 3),
+                                       GET_SWZ(orig_swz, 3));
           if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
              emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
           else
-            emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+            emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0],
+                     st_src_reg_for_int(1));
        }
        break;
     }
@@ -2206,9 +2272,11 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        for (el = 0; el < vector_elements; el++) {
           unsigned swz = GET_SWZ(orig_swz, el);
           if (swz & 1)
-            op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z);
+            op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z,
+                                          SWIZZLE_Z, SWIZZLE_Z);
           else
-            op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+            op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X,
+                                          SWIZZLE_X, SWIZZLE_X);
           if (swz > 2)
              op[0].index = orig_idx + 1;
           op[0].type = GLSL_TYPE_UINT;
@@ -2223,7 +2291,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        break;
     }
     case ir_unop_i642b:
-      emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0));
+      emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0],
+               st_src_reg_for_int64(0));
        break;
     case ir_unop_i642f:
        emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
@@ -2440,6 +2509,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
           unsigned num_components;
           num_outputs++;
  
+         decl->invariant = var->data.invariant;
+
           if (type_without_array->is_64bit())
              component = component / 2;
           if (type_without_array->vector_elements)
@@ -2875,7 +2946,8 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *
     if (type->is_matrix()) {
        const struct glsl_type *vec_type;
  
-      vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
+      vec_type = glsl_type::get_instance(type->is_double()
+                                         ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
                                           type->vector_elements, 1);
  
        for (int i = 0; i < type->matrix_columns; i++) {
@@ -2893,7 +2965,8 @@ glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *
  
        if (l_src.file == PROGRAM_OUTPUT &&
            this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
-          (l_src.index == FRAG_RESULT_DEPTH || l_src.index == FRAG_RESULT_STENCIL)) {
+          (l_src.index == FRAG_RESULT_DEPTH ||
+           l_src.index == FRAG_RESULT_STENCIL)) {
           /* This is a special case because the source swizzles will be shifted
            * later to account for the difference between GLSL (where they're
            * plain floats) and TGSI (where they're Z and Y components). */
@@ -2957,7 +3030,8 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
        } else if (ir->write_mask == 0) {
           assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
  
-         unsigned num_elements = ir->lhs->type->without_array()->vector_elements;
+         unsigned num_elements =
+            ir->lhs->type->without_array()->vector_elements;
  
           if (num_elements) {
              l.writemask = u_bit_consecutive(0, num_elements);
@@ -3096,7 +3170,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
        for (i = 0; i < ir->type->matrix_columns; i++) {
           switch (ir->type->base_type) {
           case GLSL_TYPE_FLOAT:
-            values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
+            values = (gl_constant_value *)
+               &ir->value.f[i * ir->type->vector_elements];
  
              src = st_src_reg(file, -1, ir->type->base_type);
              src.index = add_constant(file,
@@ -3107,7 +3182,8 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
              emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
              break;
           case GLSL_TYPE_DOUBLE:
-            values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
+            values = (gl_constant_value *)
+               &ir->value.d[i * ir->type->vector_elements];
              src = st_src_reg(file, -1, ir->type->base_type);
              src.index = add_constant(file,
                                       values,
@@ -3116,22 +3192,26 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
                                       &src.swizzle);
              if (ir->type->vector_elements >= 2) {
                 mat_column.writemask = WRITEMASK_XY;
-               src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+               src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+                                           SWIZZLE_X, SWIZZLE_Y);
                 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
              } else {
                 mat_column.writemask = WRITEMASK_X;
-               src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+               src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X,
+                                           SWIZZLE_X, SWIZZLE_X);
                 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
              }
              src.index++;
              if (ir->type->vector_elements > 2) {
                 if (ir->type->vector_elements == 4) {
                    mat_column.writemask = WRITEMASK_ZW;
-                  src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
+                  src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
+                                              SWIZZLE_X, SWIZZLE_Y);
                    emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
                 } else {
                    mat_column.writemask = WRITEMASK_Z;
-                  src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
+                  src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y,
+                                              SWIZZLE_Y, SWIZZLE_Y);
                    emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
                    mat_column.writemask = WRITEMASK_XYZW;
                    src.swizzle = SWIZZLE_XYZW;
@@ -3227,7 +3307,8 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
  
     if (has_hw_atomics) {
        variable_storage *entry = find_variable_storage(location);
-      st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, location->data.binding);
+      st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT,
+                        location->data.binding);
  
        if (!entry) {
           entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC,
@@ -3299,7 +3380,7 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
        val->accept(this);
  
        st_src_reg data = this->result, data2 = undef_src;
-      unsigned opcode;
+      enum tgsi_opcode opcode;
        switch (ir->callee->intrinsic_id) {
        case ir_intrinsic_atomic_counter_add:
           opcode = TGSI_OPCODE_ATOMUADD;
@@ -3352,11 +3433,11 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
     ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
  
     ir_constant *const_block = block->as_constant();
-
+   int buf_base = st_context(ctx)->has_hw_atomics
+      ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
     st_src_reg buffer(
           PROGRAM_BUFFER,
-         ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
-         (const_block ? const_block->value.u[0] : 0),
+         buf_base + (const_block ? const_block->value.u[0] : 0),
           GLSL_TYPE_UINT);
  
     if (!const_block) {
@@ -3382,7 +3463,8 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
     if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) {
        inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
        if (dst.type == GLSL_TYPE_BOOL)
-         emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
+         emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst),
+                  st_src_reg_for_int(0));
     } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) {
        param = param->get_next();
        ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
@@ -3401,7 +3483,7 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
        val->accept(this);
  
        st_src_reg data = this->result, data2 = undef_src;
-      unsigned opcode;
+      enum tgsi_opcode opcode;
        switch (ir->callee->intrinsic_id) {
        case ir_intrinsic_ssbo_atomic_add:
           opcode = TGSI_OPCODE_ATOMUADD;
@@ -3533,7 +3615,7 @@ glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
        val->accept(this);
  
        st_src_reg data = this->result, data2 = undef_src;
-      unsigned opcode;
+      enum tgsi_opcode opcode;
        switch (ir->callee->intrinsic_id) {
        case ir_intrinsic_shared_atomic_add:
           opcode = TGSI_OPCODE_ATOMUADD;
@@ -3718,7 +3800,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
  
        assert(param->is_tail_sentinel());
  
-      unsigned opcode;
+      enum tgsi_opcode opcode;
        switch (ir->callee->intrinsic_id) {
        case ir_intrinsic_image_load:
           opcode = TGSI_OPCODE_LOAD;
@@ -3783,7 +3865,7 @@ glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
  }
  
  void
-glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op)
+glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, enum tgsi_opcode op)
  {
     ir->return_deref->accept(this);
     st_dst_reg dst = st_dst_reg(this->result);
@@ -3911,6 +3993,8 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
     case ir_intrinsic_generic_atomic_max:
     case ir_intrinsic_generic_atomic_exchange:
     case ir_intrinsic_generic_atomic_comp_swap:
+   case ir_intrinsic_begin_invocation_interlock:
+   case ir_intrinsic_end_invocation_interlock:
        unreachable("Invalid intrinsic");
     }
  }
@@ -4017,7 +4101,11 @@ glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
  st_src_reg
  glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset)
  {
-   if (offset.reladdr || offset.reladdr2) {
+   if (offset.reladdr || offset.reladdr2 ||
+       offset.has_index2 ||
+       offset.file == PROGRAM_UNIFORM ||
+       offset.file == PROGRAM_CONSTANT ||
+       offset.file == PROGRAM_STATE_VAR) {
        st_src_reg tmp = get_temp(glsl_type::ivec2_type);
        st_dst_reg tmp_dst = st_dst_reg(tmp);
        tmp_dst.writemask = WRITEMASK_XY;
@@ -4036,7 +4124,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
     st_src_reg levels_src, reladdr;
     st_dst_reg result_dst, coord_dst, cube_sc_dst;
     glsl_to_tgsi_instruction *inst = NULL;
-   unsigned opcode = TGSI_OPCODE_NOP;
+   enum tgsi_opcode opcode = TGSI_OPCODE_NOP;
     const glsl_type *sampler_type = ir->sampler->type;
     unsigned sampler_array_size = 1, sampler_base = 0;
     bool is_cube_array = false, is_cube_shadow = false;
@@ -4228,9 +4316,9 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
        }
     }
  
-   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
-    * comparator was put in the correct place (and projected) by the code,
-    * above, that handles by-hand projection.
+   /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the
+    * shadow comparator was put in the correct place (and projected) by the
+    * code, above, that handles by-hand projection.
      */
     if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
        /* Slot the shadow value in as the second to last component of the
@@ -4330,9 +4418,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
  
     if (ir->offset) {
        if (!inst->tex_offsets)
-         inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET);
+         inst->tex_offsets = rzalloc_array(inst, st_src_reg,
+                                           MAX_GLSL_TEXTURE_OFFSET);
  
-      for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
+      for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET &&
+                  offset[i].file != PROGRAM_UNDEFINED; i++)
           inst->tex_offsets[i] = offset[i];
        inst->tex_offset_num_offset = i;
     }
@@ -4377,7 +4467,7 @@ glsl_to_tgsi_visitor::visit(ir_discard *ir)
  void
  glsl_to_tgsi_visitor::visit(ir_if *ir)
  {
-   unsigned if_opcode;
+   enum tgsi_opcode if_opcode;
     glsl_to_tgsi_instruction *if_inst;
  
     ir->condition->accept(this);
@@ -4630,7 +4720,8 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
            && !(inst->dst[0].writemask & prevWriteMask)
            && inst->src[2].file == inst->dst[0].file
            && inst->src[2].index == inst->dst[0].index
-          && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
+          && inst->dst[0].writemask ==
+             get_src_arg_mask(inst->dst[0], inst->src[2])) {
  
           inst->op = TGSI_OPCODE_MOV;
           inst->info = tgsi_get_opcode_info(inst->op);
@@ -4701,7 +4792,7 @@ glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes)
        }
  
        if (inst->op == TGSI_OPCODE_BGNLOOP) {
-         if(depth++ == 0)
+         if (depth++ == 0)
              loop_start = i;
        } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
           if (--depth == 0)
@@ -4733,7 +4824,7 @@ glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
           }
        }
        if (inst->op == TGSI_OPCODE_BGNLOOP) {
-         if(depth++ == 0)
+         if (depth++ == 0)
              loop_start = i;
        } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
           if (--depth == 0)
@@ -4768,7 +4859,7 @@ glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *
              last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
        }
        if (inst->op == TGSI_OPCODE_BGNLOOP) {
-         if(depth++ == 0)
+         if (depth++ == 0)
              loop_start = i;
        } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
           if (--depth == 0) {
@@ -5088,7 +5179,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void)
                    write_level[4 * r + c] = level-1;
              }
           }
-         if(inst->op == TGSI_OPCODE_ENDIF)
+         if (inst->op == TGSI_OPCODE_ENDIF)
              --level;
           break;
  
@@ -5253,7 +5344,7 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
           defined = 0;
  
        inst2 = (glsl_to_tgsi_instruction *) inst->next;
-      do {
+      while (!inst2->is_tail_sentinel()) {
           if (inst->op == inst2->op &&
               inst2->dst[defined].file == PROGRAM_UNDEFINED &&
               inst->src[0].file == inst2->src[0].file &&
@@ -5262,9 +5353,9 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
               inst->src[0].swizzle == inst2->src[0].swizzle)
              break;
           inst2 = (glsl_to_tgsi_instruction *) inst2->next;
-      } while (inst2);
+      }
  
-      if (!inst2) {
+      if (inst2->is_tail_sentinel()) {
           /* Undefined destinations are not allowed, substitute with an unused
            * temporary register.
            */
@@ -5288,7 +5379,6 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
  void
  glsl_to_tgsi_visitor::merge_registers(void)
  {
-   assert(need_uarl);
     struct lifetime *lifetimes =
           rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
  
@@ -5369,12 +5459,12 @@ struct st_translate {
     const ubyte *inputMapping;
     const ubyte *outputMapping;
  
-   unsigned procType;  /**< PIPE_SHADER_VERTEX/FRAGMENT */
+   enum pipe_shader_type procType;  /**< PIPE_SHADER_VERTEX/FRAGMENT */
     bool need_uarl;
  };
  
  /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
-unsigned
+enum tgsi_semantic
  _mesa_sysval_to_semantic(unsigned sysval)
  {
     switch (sysval) {
@@ -5464,12 +5554,11 @@ _mesa_sysval_to_semantic(unsigned sysval)
  static struct ureg_src
  emit_immediate(struct st_translate *t,
                 gl_constant_value values[4],
-               int type, int size)
+               GLenum type, int size)
  {
     struct ureg_program *ureg = t->ureg;
  
-   switch(type)
-   {
+   switch (type) {
     case GL_FLOAT:
        return ureg_DECL_immediate(ureg, &values[0].f, size);
     case GL_DOUBLE:
@@ -5498,7 +5587,7 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index,
  {
     unsigned array;
  
-   switch(file) {
+   switch (file) {
     case PROGRAM_UNDEFINED:
        return ureg_dst_undef();
  
@@ -5547,7 +5636,9 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index,
           return t->outputs[t->outputMapping[index]];
        }
        else {
-         struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id);
+         struct inout_decl *decl =
+            find_inout_array(t->output_decls,
+                             t->num_output_decls, array_id);
           unsigned mesa_index = decl->mesa_index;
           int slot = t->outputMapping[mesa_index];
  
@@ -5626,18 +5717,20 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
     int index = src_reg->index;
     int double_reg2 = src_reg->double_reg2 ? 1 : 0;
  
-   switch(src_reg->file) {
+   switch (src_reg->file) {
     case PROGRAM_UNDEFINED:
        src = ureg_imm4f(t->ureg, 0, 0, 0, 0);
        break;
  
     case PROGRAM_TEMPORARY:
     case PROGRAM_ARRAY:
-      src = ureg_src(dst_register(t, src_reg->file, src_reg->index, src_reg->array_id));
+      src = ureg_src(dst_register(t, src_reg->file, src_reg->index,
+                                  src_reg->array_id));
        break;
  
     case PROGRAM_OUTPUT: {
-      struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, src_reg->array_id);
+      struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index,
+                                         src_reg->array_id);
        assert(dst.WriteMask != 0);
        unsigned shift = ffs(dst.WriteMask) - 1;
        src = ureg_swizzle(ureg_src(dst),
@@ -5678,7 +5771,8 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
           src = t->inputs[t->inputMapping[index] + double_reg2];
        }
        else {
-         struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls,
+         struct inout_decl *decl = find_inout_array(t->input_decls,
+                                                    t->num_input_decls,
                                                      src_reg->array_id);
           unsigned mesa_index = decl->mesa_index;
           int slot = t->inputMapping[mesa_index];
@@ -5777,7 +5871,7 @@ compile_tgsi_instruction(struct st_translate *t,
  
     int num_dst;
     int num_src;
-   unsigned tex_target = 0;
+   enum tgsi_texture_type tex_target = TGSI_TEXTURE_BUFFER;
  
     num_dst = num_inst_dst_regs(inst);
     num_src = num_inst_src_regs(inst);
@@ -5790,7 +5884,7 @@ compile_tgsi_instruction(struct st_translate *t,
     for (i = 0; i < num_src; i++)
        src[i] = translate_src(t, &inst->src[i]);
  
-   switch(inst->op) {
+   switch (inst->op) {
     case TGSI_OPCODE_BGNLOOP:
     case TGSI_OPCODE_ELSE:
     case TGSI_OPCODE_ENDLOOP:
@@ -5938,7 +6032,7 @@ emit_wpos_adjustment(struct gl_context *ctx,
      * where T = INPUT[WPOS] is inverted by Y.
      */
     struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
-   struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
+   struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
     struct ureg_src *wpos =
        ctx->Const.GLSLFragCoordIsSysVal ?
           &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
@@ -5968,7 +6062,7 @@ emit_wpos_adjustment(struct gl_context *ctx,
     } else {
        /* MOV wpos_temp, input[wpos]
         */
-      ureg_MOV( ureg, wpos_temp, wpos_input );
+      ureg_MOV(ureg, wpos_temp, wpos_input);
     }
  
     /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
@@ -5977,19 +6071,19 @@ emit_wpos_adjustment(struct gl_context *ctx,
     if (invert) {
        /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
         */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 0),
-                ureg_scalar(wpostrans, 1));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 0),
+               ureg_scalar(wpostrans, 1));
     } else {
        /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
         */
-      ureg_MAD( ureg,
-                ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
-                wpos_input,
-                ureg_scalar(wpostrans, 2),
-                ureg_scalar(wpostrans, 3));
+      ureg_MAD(ureg,
+               ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
+               wpos_input,
+               ureg_scalar(wpostrans, 2),
+               ureg_scalar(wpostrans, 3));
     }
  
     /* Use wpos_temp as position input from here on:
@@ -6163,7 +6257,7 @@ sort_inout_decls_by_slot(struct inout_decl *decls,
     std::sort(decls, decls + count, sorter);
  }
  
-static unsigned
+static enum tgsi_interpolate_mode
  st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying)
  {
     switch (glsl_qual) {
@@ -6205,7 +6299,7 @@ st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying)
  extern "C" enum pipe_error
  st_translate_program(
     struct gl_context *ctx,
-   uint procType,
+   enum pipe_shader_type procType,
     struct ureg_program *ureg,
     glsl_to_tgsi_visitor *program,
     const struct gl_program *proginfo,
@@ -6230,6 +6324,17 @@ st_translate_program(
     assert(numInputs <= ARRAY_SIZE(t->inputs));
     assert(numOutputs <= ARRAY_SIZE(t->outputs));
  
+   ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR);
+   ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR);
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_type, GLSL_TYPE_ERROR);
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT);
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target,
+                        (gl_texture_index) (NUM_TEXTURE_TARGETS - 1));
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format,
+                        (enum pipe_format) (PIPE_FORMAT_COUNT - 1));
+   ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op,
+                        (enum tgsi_opcode) (TGSI_OPCODE_LAST - 1));
+
     t = CALLOC_STRUCT(st_translate);
     if (!t) {
        ret = PIPE_ERROR_OUT_OF_MEMORY;
@@ -6271,19 +6376,20 @@ st_translate_program(
                 tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
           }
  
-         unsigned interp_mode = 0;
-         unsigned interp_location = 0;
+         enum tgsi_interpolate_mode interp_mode = TGSI_INTERPOLATE_CONSTANT;
+         enum tgsi_interpolate_loc interp_location = TGSI_INTERPOLATE_LOC_CENTER;
           if (procType == PIPE_SHADER_FRAGMENT) {
              assert(interpMode);
              interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ?
-               interpMode[slot] :
+               (enum tgsi_interpolate_mode) interpMode[slot] :
                 st_translate_interp(decl->interp, inputSlotToAttr[slot]);
  
-            interp_location = decl->interp_loc;
+            interp_location = (enum tgsi_interpolate_loc) decl->interp_loc;
           }
  
           src = ureg_DECL_fs_input_cyl_centroid_layout(ureg,
-                  inputSemanticName[slot], inputSemanticIndex[slot],
+                  (enum tgsi_semantic) inputSemanticName[slot],
+                  inputSemanticIndex[slot],
                    interp_mode, 0, interp_location, slot, tgsi_usage_mask,
                    decl->array_id, decl->size);
  
@@ -6337,16 +6443,18 @@ st_translate_program(
           }
  
           dst = ureg_DECL_output_layout(ureg,
-                     outputSemanticName[slot], outputSemanticIndex[slot],
+                     (enum tgsi_semantic) outputSemanticName[slot],
+                     outputSemanticIndex[slot],
                       decl->gs_out_streams,
-                     slot, tgsi_usage_mask, decl->array_id, decl->size);
-
+                     slot, tgsi_usage_mask, decl->array_id, decl->size, decl->invariant);
+         dst.Invariant = decl->invariant;
           for (unsigned j = 0; j < decl->size; ++j) {
              if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) {
                 /* The ArrayID is set up in dst_register */
                 t->outputs[slot + j] = dst;
                 t->outputs[slot + j].ArrayID = 0;
                 t->outputs[slot + j].Index += j;
+               t->outputs[slot + j].Invariant = decl->invariant;
              }
           }
        }
@@ -6435,11 +6543,11 @@ st_translate_program(
     /* Declare misc input registers
      */
     {
-      GLbitfield sysInputs = proginfo->info.system_values_read;
+      GLbitfield64 sysInputs = proginfo->info.system_values_read;
  
        for (i = 0; sysInputs; i++) {
-         if (sysInputs & (1 << i)) {
-            unsigned semName = _mesa_sysval_to_semantic(i);
+         if (sysInputs & (1ull << i)) {
+            enum tgsi_semantic semName = _mesa_sysval_to_semantic(i);
  
              t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
  
@@ -6459,7 +6567,8 @@ st_translate_program(
                 (void) pscreen;
                 if (!ctx->Const.NativeIntegers) {
                    struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
-                  ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
+                  ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X),
+                           t->systemValues[i]);
                    t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
                 }
              }
@@ -6469,7 +6578,7 @@ st_translate_program(
                 emit_wpos(st_context(ctx), t, proginfo, ureg,
                           program->wpos_transform_const);
  
-            sysInputs &= ~(1 << i);
+            sysInputs &= ~(1ull << i);
           }
        }
     }
@@ -6493,6 +6602,8 @@ st_translate_program(
        t->num_constants = proginfo->Parameters->NumParameters;
  
        for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
+         unsigned pvo = proginfo->Parameters->ParameterValueOffset[i];
+
           switch (proginfo->Parameters->Parameters[i].Type) {
           case PROGRAM_STATE_VAR:
           case PROGRAM_UNIFORM:
@@ -6510,7 +6621,7 @@ st_translate_program(
                 t->constants[i] = ureg_DECL_constant(ureg, i);
              else
                 t->constants[i] = emit_immediate(t,
-                                                proginfo->Parameters->ParameterValues[i],
+                                                proginfo->Parameters->ParameterValues + pvo,
                                                  proginfo->Parameters->Parameters[i].DataType,
                                                  4);
              break;
@@ -6550,12 +6661,13 @@ st_translate_program(
     /* texture samplers */
     for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
        if (program->samplers_used & (1u << i)) {
-         unsigned type = st_translate_texture_type(program->sampler_types[i]);
+         enum tgsi_return_type type =
+            st_translate_texture_type(program->sampler_types[i]);
  
           t->samplers[i] = ureg_DECL_sampler(ureg, i);
  
-         ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
-                                 type, type, type, type );
+         ureg_DECL_sampler_view(ureg, i, program->sampler_targets[i],
+                                type, type, type, type);
        }
     }
  
@@ -6564,7 +6676,7 @@ st_translate_program(
        struct gl_program *prog = program->prog;
  
        if (!st_context(ctx)->has_hw_atomics) {
-        for (i = 0; i < prog->info.num_abos; i++) {
+         for (i = 0; i < prog->info.num_abos; i++) {
              unsigned index = prog->sh.AtomicBuffers[i]->Binding;
              assert(index < frag_const->MaxAtomicBuffers);
              t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
@@ -6581,7 +6693,10 @@ st_translate_program(
  
        assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks);
        for (i = 0; i < prog->info.num_ssbos; i++) {
-         unsigned index = frag_const->MaxAtomicBuffers + i;
+         unsigned index = i;
+         if (!st_context(ctx)->has_hw_atomics)
+            index += frag_const->MaxAtomicBuffers;
+
           t->buffers[index] = ureg_DECL_buffer(ureg, index, false);
        }
     }
@@ -6618,6 +6733,8 @@ st_translate_program(
           }
        }
        break;
+   default:
+      ; /* nothing - silence compiler warning */
     }
  
  out:
@@ -6748,7 +6865,7 @@ get_mesa_program_tgsi(struct gl_context *ctx,
     _mesa_copy_linked_program_data(shader_program, shader);
     shrink_array_declarations(v->inputs, v->num_inputs,
                               &prog->info.inputs_read,
-                             prog->info.double_inputs_read,
+                             prog->info.vs.double_inputs_read,
                               &prog->info.patch_inputs_read);
     shrink_array_declarations(v->outputs, v->num_outputs,
                               &prog->info.outputs_written, 0ULL,
@@ -6762,8 +6879,8 @@ get_mesa_program_tgsi(struct gl_context *ctx,
     /* This must be done before the uniform storage is associated. */
     if (shader->Stage == MESA_SHADER_FRAGMENT &&
         (prog->info.inputs_read & VARYING_BIT_POS ||
-        prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) {
-      static const gl_state_index wposTransformState[STATE_LENGTH] = {
+        prog->info.system_values_read & (1ull << SYSTEM_VALUE_FRAG_COORD))) {
+      static const gl_state_index16 wposTransformState[STATE_LENGTH] = {
           STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
        };
  
@@ -6881,12 +6998,18 @@ extern "C" {
  GLboolean
  st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  {
+   struct pipe_screen *pscreen = ctx->st->pipe->screen;
+
+   enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
+      pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX,
+                                PIPE_SHADER_CAP_PREFERRED_IR);
+   bool use_nir = preferred_ir == PIPE_SHADER_IR_NIR;
+
     /* Return early if we are loading the shader from on-disk cache */
-   if (st_load_tgsi_from_disk_cache(ctx, prog)) {
+   if (st_load_ir_from_disk_cache(ctx, prog, use_nir)) {
        return GL_TRUE;
     }
  
-   struct pipe_screen *pscreen = ctx->st->pipe->screen;
     assert(prog->data->LinkStatus);
  
     for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
@@ -6948,7 +7071,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
        do_mat_op_to_vec(ir);
  
        if (stage == MESA_SHADER_FRAGMENT)
-         lower_blend_equation_advanced(shader);
+         lower_blend_equation_advanced(
+            shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
  
        lower_instructions(ir,
                           MOD_TO_FLOOR |
@@ -7006,29 +7130,27 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
           } while (progress);
        }
  
+      /* Do this again to lower ir_binop_vector_extract introduced
+       * by optimization passes.
+       */
+      do_vec_index_to_cond_assign(ir);
+
        validate_ir_tree(ir);
     }
  
     build_program_resource_list(ctx, prog);
  
+   if (use_nir)
+      return st_link_nir(ctx, prog);
+
     for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
        struct gl_linked_shader *shader = prog->_LinkedShaders[i];
        if (shader == NULL)
           continue;
  
-      enum pipe_shader_type ptarget =
-         pipe_shader_type_from_mesa(shader->Stage);
-      enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
-         pscreen->get_shader_param(pscreen, ptarget,
-                                   PIPE_SHADER_CAP_PREFERRED_IR);
-
-      struct gl_program *linked_prog = NULL;
-      if (preferred_ir == PIPE_SHADER_IR_NIR) {
-         linked_prog = st_nir_get_mesa_program(ctx, prog, shader);
-      } else {
-         linked_prog = get_mesa_program_tgsi(ctx, prog, shader);
-         st_set_prog_affected_state_flags(linked_prog);
-      }
+      struct gl_program *linked_prog =
+         get_mesa_program_tgsi(ctx, prog, shader);
+      st_set_prog_affected_state_flags(linked_prog);
  
        if (linked_prog) {
           if (!ctx->Driver.ProgramStringNotify(ctx,