st/glsl_to_tgsi: avoid iterating past the head of the instruction list

[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 8a247ea68d7a30a14740c8035f330369ffee1498..af41bdbbea10d622824823a8d516e3de597d858a 100644 (file)
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -54,6 +54,7 @@
  #include "st_format.h"
  #include "st_glsl_types.h"
  #include "st_nir.h"
+#include "st_shader_cache.h"
  
  #include <algorithm>
  
@@ -98,6 +99,7 @@ public:
        this->index = index;
        this->swizzle = swizzle_for_type(type, component);
        this->negate = 0;
+      this->abs = 0;
        this->index2D = 0;
        this->type = type ? type->base_type : GLSL_TYPE_ERROR;
        this->reladdr = NULL;
@@ -117,6 +119,7 @@ public:
        this->index2D = 0;
        this->swizzle = SWIZZLE_XYZW;
        this->negate = 0;
+      this->abs = 0;
        this->reladdr = NULL;
        this->reladdr2 = NULL;
        this->has_index2 = false;
@@ -134,6 +137,7 @@ public:
        this->index2D = index2D;
        this->swizzle = SWIZZLE_XYZW;
        this->negate = 0;
+      this->abs = 0;
        this->reladdr = NULL;
        this->reladdr2 = NULL;
        this->has_index2 = false;
@@ -150,6 +154,7 @@ public:
        this->index2D = 0;
        this->swizzle = 0;
        this->negate = 0;
+      this->abs = 0;
        this->reladdr = NULL;
        this->reladdr2 = NULL;
        this->has_index2 = false;
@@ -164,7 +169,8 @@ public:
     int16_t index2D;
     uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
     int negate:4; /**< NEGATE_XYZW mask from mesa */
-   enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   unsigned abs:1;
+   enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
     unsigned has_index2:1;
     gl_register_file file:5; /**< PROGRAM_* from Mesa */
     /*
@@ -178,6 +184,14 @@ public:
     /** Register index should be offset by the integer in this reg. */
     st_src_reg *reladdr;
     st_src_reg *reladdr2;
+
+   st_src_reg get_abs()
+   {
+      st_src_reg reg = *this;
+      reg.negate = 0;
+      reg.abs = 1;
+      return reg;
+   }
  };
  
  class st_dst_reg {
@@ -229,7 +243,7 @@ public:
     int16_t index2D;
     gl_register_file file:5; /**< PROGRAM_* from Mesa */
     unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */
-   enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
+   enum glsl_base_type type:5; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
     unsigned has_index2:1;
     unsigned array_id:10;
  
@@ -245,6 +259,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
     this->index = reg.index;
     this->swizzle = SWIZZLE_XYZW;
     this->negate = 0;
+   this->abs = 0;
     this->reladdr = reg.reladdr;
     this->index2D = reg.index2D;
     this->reladdr2 = reg.reladdr2;
@@ -285,7 +300,7 @@ public:
     unsigned sampler_base:5;
     unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */
     unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
-   glsl_base_type tex_type:4;
+   glsl_base_type tex_type:5;
     unsigned tex_shadow:1;
     unsigned image_format:9;
     unsigned tex_offset_num_offset:3;
@@ -339,6 +354,7 @@ struct inout_decl {
     unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */
     unsigned size;
     unsigned interp_loc;
+   unsigned gs_out_streams;
     enum glsl_interp_mode interp;
     enum glsl_base_type base_type;
     ubyte usage_mask; /* GLSL-style usage-mask,  i.e. single bit per double */
@@ -573,7 +589,7 @@ fail_link(struct gl_shader_program *prog, const char *fmt, ...)
     ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args);
     va_end(args);
  
-   prog->data->LinkStatus = GL_FALSE;
+   prog->data->LinkStatus = linking_failure;
  }
  
  static int
@@ -886,6 +902,10 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
  
     if (is_resource_instruction(op))
        type = src1.type;
+   else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64)
+      type = GLSL_TYPE_INT64;
+   else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64)
+      type = GLSL_TYPE_UINT64;
     else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
        type = GLSL_TYPE_DOUBLE;
     else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
@@ -893,6 +913,21 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
     else if (native_integers)
        type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
  
+#define case7(c, f, i, u, d, i64, ui64)             \
+   case TGSI_OPCODE_##c: \
+      if (type == GLSL_TYPE_UINT64)           \
+         op = TGSI_OPCODE_##ui64; \
+      else if (type == GLSL_TYPE_INT64)       \
+         op = TGSI_OPCODE_##i64; \
+      else if (type == GLSL_TYPE_DOUBLE)       \
+         op = TGSI_OPCODE_##d; \
+      else if (type == GLSL_TYPE_INT)       \
+         op = TGSI_OPCODE_##i; \
+      else if (type == GLSL_TYPE_UINT) \
+         op = TGSI_OPCODE_##u; \
+      else \
+         op = TGSI_OPCODE_##f; \
+      break;
  #define case5(c, f, i, u, d)                    \
     case TGSI_OPCODE_##c: \
        if (type == GLSL_TYPE_DOUBLE)           \
@@ -916,14 +951,22 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
        break;
  
  #define case3(f, i, u)  case4(f, f, i, u)
-#define case4d(f, i, u, d)  case5(f, f, i, u, d)
+#define case6d(f, i, u, d, i64, u64)  case7(f, f, i, u, d, i64, u64)
  #define case3fid(f, i, d) case5(f, f, i, i, d)
+#define case3fid64(f, i, d, i64) case7(f, f, i, i, d, i64, i64)
  #define case2fi(f, i)   case4(f, f, i, i)
  #define case2iu(i, u)   case4(i, LAST, i, u)
  
-#define casecomp(c, f, i, u, d)                   \
+#define case2iu64(i, i64)   case7(i, LAST, i, i, LAST, i64, i64)
+#define case4iu64(i, u, i64, u64)   case7(i, LAST, i, u, LAST, i64, u64)
+
+#define casecomp(c, f, i, u, d, i64, ui64)           \
     case TGSI_OPCODE_##c: \
-      if (type == GLSL_TYPE_DOUBLE) \
+      if (type == GLSL_TYPE_INT64)             \
+         op = TGSI_OPCODE_##i64; \
+      else if (type == GLSL_TYPE_UINT64)        \
+         op = TGSI_OPCODE_##ui64; \
+      else if (type == GLSL_TYPE_DOUBLE)       \
           op = TGSI_OPCODE_##d; \
        else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE)       \
           op = TGSI_OPCODE_##i; \
@@ -936,24 +979,24 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
        break;
  
     switch(op) {
-      case3fid(ADD, UADD, DADD);
-      case3fid(MUL, UMUL, DMUL);
+      case3fid64(ADD, UADD, DADD, U64ADD);
+      case3fid64(MUL, UMUL, DMUL, U64MUL);
        case3fid(MAD, UMAD, DMAD);
        case3fid(FMA, UMAD, DFMA);
-      case3(DIV, IDIV, UDIV);
-      case4d(MAX, IMAX, UMAX, DMAX);
-      case4d(MIN, IMIN, UMIN, DMIN);
-      case2iu(MOD, UMOD);
+      case6d(DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV);
+      case6d(MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
+      case6d(MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
+      case4iu64(MOD, UMOD, I64MOD, U64MOD);
  
-      casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
-      casecomp(SNE, FSNE, USNE, USNE, DSNE);
-      casecomp(SGE, FSGE, ISGE, USGE, DSGE);
-      casecomp(SLT, FSLT, ISLT, USLT, DSLT);
+      casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
+      casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
+      casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
+      casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
  
-      case2iu(ISHR, USHR);
+      case2iu64(SHL, U64SHL);
+      case4iu64(ISHR, USHR, I64SHR, U64SHR);
  
-      case3fid(SSG, ISSG, DSSG);
-      case3fid(ABS, IABS, DABS);
+      case3fid64(SSG, ISSG, DSSG, I64SSG);
  
        case2iu(IBFE, UBFE);
        case2iu(IMSB, UMSB);
@@ -1089,7 +1132,9 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
  
     int index = 0;
     immediate_storage *entry;
-   int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
+   int size32 = size * ((datatype == GL_DOUBLE ||
+                         datatype == GL_INT64_ARB ||
+                         datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
     int i;
  
     /* Search immediate storage to see if we already have an identical
@@ -1227,6 +1272,7 @@ glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
     src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
     src.reladdr = NULL;
     src.negate = 0;
+   src.abs = 0;
  
     if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
        if (next_array >= max_num_arrays) {
@@ -1579,7 +1625,9 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        }
        break;
     case ir_unop_neg:
-      if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
+      if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+         emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]);
+      else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
           emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
        else if (result_dst.type == GLSL_TYPE_DOUBLE)
           emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
@@ -1592,7 +1640,14 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
        break;
     case ir_unop_abs:
-      emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
+      if (result_dst.type == GLSL_TYPE_FLOAT)
+         emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
+      else if (result_dst.type == GLSL_TYPE_DOUBLE)
+         emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
+      else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
+         emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]);
+      else
+         emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
        break;
     case ir_unop_sign:
        emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
@@ -1682,17 +1737,15 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
        break;
     case ir_binop_sub:
-      emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
+      op[1].negate = ~op[1].negate;
+      emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
        break;
  
     case ir_binop_mul:
        emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
        break;
     case ir_binop_div:
-      if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
-         assert(!"not reached: should be handled by ir_div_to_mul_rcp");
-      else
-         emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
+      emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
        break;
     case ir_binop_mod:
        if (result_dst.type == GLSL_TYPE_FLOAT)
@@ -1918,8 +1971,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
            * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ
            * behavior.
            */
-         emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
-         emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src);
+         emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs());
           emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src);
        }
        break;
@@ -1940,6 +1992,8 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        /* fallthrough to next case otherwise */
     case ir_unop_i2u:
     case ir_unop_u2i:
+   case ir_unop_i642u64:
+   case ir_unop_u642i64:
        /* Converting between signed and unsigned integers is a no-op. */
        result_src = op[0];
        result_src.type = result_dst.type;
@@ -1973,7 +2027,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
     case ir_unop_bitcast_f2i:
     case ir_unop_bitcast_f2u:
        /* Make sure we don't propagate the negate modifier to integer opcodes. */
-      if (op[0].negate)
+      if (op[0].negate || op[0].abs)
           emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
        else
           result_src = op[0];
@@ -1997,6 +2051,19 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        else
           emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
        break;
+   case ir_unop_bitcast_u642d:
+   case ir_unop_bitcast_i642d:
+      result_src = op[0];
+      result_src.type = GLSL_TYPE_DOUBLE;
+      break;
+   case ir_unop_bitcast_d2i64:
+      result_src = op[0];
+      result_src.type = GLSL_TYPE_INT64;
+      break;
+   case ir_unop_bitcast_d2u64:
+      result_src = op[0];
+      result_src.type = GLSL_TYPE_UINT64;
+      break;
     case ir_unop_trunc:
        emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
        break;
@@ -2075,6 +2142,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        cbuf.index = 0;
        cbuf.reladdr = NULL;
        cbuf.negate = 0;
+      cbuf.abs = 0;
  
        assert(ir->type->is_vector() || ir->type->is_scalar());
  
@@ -2241,6 +2309,10 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
        break;
     case ir_unop_unpack_double_2x32:
     case ir_unop_pack_double_2x32:
+   case ir_unop_unpack_int_2x32:
+   case ir_unop_pack_int_2x32:
+   case ir_unop_unpack_uint_2x32:
+   case ir_unop_pack_uint_2x32:
        emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
        break;
  
@@ -2284,7 +2356,120 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
     case ir_unop_vote_eq:
        emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]);
        break;
-
+   case ir_unop_u2i64:
+   case ir_unop_u2u64:
+   case ir_unop_b2i64: {
+      st_src_reg temp = get_temp(glsl_type::uvec4_type);
+      st_dst_reg temp_dst = st_dst_reg(temp);
+      unsigned orig_swz = op[0].swizzle;
+      /* 
+       * To convert unsigned to 64-bit:
+       * zero Y channel, copy X channel.
+       */
+      temp_dst.writemask = WRITEMASK_Y;
+      if (vector_elements > 1)
+         temp_dst.writemask |= WRITEMASK_W;
+      emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+      temp_dst.writemask = WRITEMASK_X;
+      if (vector_elements > 1)
+          temp_dst.writemask |= WRITEMASK_Z;
+      op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0),
+                                    GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1));
+      if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+         emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+      else
+         emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+      result_src = temp;
+      result_src.type = GLSL_TYPE_UINT64;
+      if (vector_elements > 2) {
+         /* Subtle: We rely on the fact that get_temp here returns the next
+          * TGSI temporary register directly after the temp register used for
+          * the first two components, so that the result gets picked up
+          * automatically.
+          */
+         st_src_reg temp = get_temp(glsl_type::uvec4_type);
+         st_dst_reg temp_dst = st_dst_reg(temp);
+         temp_dst.writemask = WRITEMASK_Y;
+         if (vector_elements > 3)
+            temp_dst.writemask |= WRITEMASK_W;
+         emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
+
+         temp_dst.writemask = WRITEMASK_X;
+         if (vector_elements > 3)
+            temp_dst.writemask |= WRITEMASK_Z;
+         op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2),
+                                       GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3));
+         if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
+            emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+         else
+            emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
+      }
+      break;
+   }
+   case ir_unop_i642i:
+   case ir_unop_u642i:
+   case ir_unop_u642u:
+   case ir_unop_i642u: {
+      st_src_reg temp = get_temp(glsl_type::uvec4_type);
+      st_dst_reg temp_dst = st_dst_reg(temp);
+      unsigned orig_swz = op[0].swizzle;
+      unsigned orig_idx = op[0].index;
+      int el;
+      temp_dst.writemask = WRITEMASK_X;
+
+      for (el = 0; el < vector_elements; el++) {
+         unsigned swz = GET_SWZ(orig_swz, el);
+         if (swz & 1)
+            op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z);
+         else
+            op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
+         if (swz > 2)
+            op[0].index = orig_idx + 1;
+         op[0].type = GLSL_TYPE_UINT;
+         temp_dst.writemask = WRITEMASK_X << el;
+         emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
+      }
+      result_src = temp;
+      if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u)
+         result_src.type = GLSL_TYPE_UINT;
+      else
+         result_src.type = GLSL_TYPE_INT;
+      break;
+   }
+   case ir_unop_i642b:
+      emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int(0));
+      break;
+   case ir_unop_i642f:
+      emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
+      break;
+   case ir_unop_u642f:
+      emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]);
+      break;
+   case ir_unop_i642d:
+      emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]);
+      break;
+   case ir_unop_u642d:
+      emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]);
+      break;
+   case ir_unop_i2i64:
+      emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+      break;
+   case ir_unop_f2i64:
+      emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]);
+      break;
+   case ir_unop_d2i64:
+      emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]);
+      break;
+   case ir_unop_i2u64:
+      emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
+      break;
+   case ir_unop_f2u64:
+      emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]);
+      break;
+   case ir_unop_d2u64:
+      emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]);
+      break;
+      /* these might be needed */
     case ir_unop_pack_snorm_2x16:
     case ir_unop_pack_unorm_2x16:
     case ir_unop_pack_snorm_4x8:
@@ -2478,6 +2663,14 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
           decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index;
           decl->base_type = type_without_array->base_type;
           decl->usage_mask = u_bit_consecutive(component, num_components);
+         if (var->data.stream & (1u << 31)) {
+            decl->gs_out_streams = var->data.stream & ~(1u << 31);
+         } else {
+            assert(var->data.stream < 4);
+            decl->gs_out_streams = 0;
+            for (unsigned i = 0; i < num_components; ++i)
+               decl->gs_out_streams |= var->data.stream << (2 * (component + i));
+         }
  
           if (is_inout_array(shader->Stage, var, &remove_array)) {
              decl->array_id = num_output_arrays + 1;
@@ -2491,10 +2684,19 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
           else
              decl->size = type_size(var->type);
  
-         entry = new(mem_ctx) variable_storage(var,
-                                               PROGRAM_OUTPUT,
-                                               decl->mesa_index,
-                                               decl->array_id);
+         if (var->data.fb_fetch_output) {
+            st_dst_reg dst = st_dst_reg(get_temp(var->type));
+            st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index,
+                                        var->type, component, decl->array_id);
+            emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src);
+            entry = new(mem_ctx) variable_storage(var, dst.file, dst.index,
+                                                  dst.array_id);
+         } else {
+            entry = new(mem_ctx) variable_storage(var,
+                                                  PROGRAM_OUTPUT,
+                                                  decl->mesa_index,
+                                                  decl->array_id);
+         }
           entry->component = component;
  
           this->variables.push_tail(entry);
@@ -3159,6 +3361,18 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir)
           memcpy(&values[i * 2], &ir->value.d[i], sizeof(double));
        }
        break;
+   case GLSL_TYPE_INT64:
+      gl_type = GL_INT64_ARB;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t));
+      }
+      break;
+   case GLSL_TYPE_UINT64:
+      gl_type = GL_UNSIGNED_INT64_ARB;
+      for (i = 0; i < ir->type->vector_elements; i++) {
+         memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t));
+      }
+      break;
     case GLSL_TYPE_UINT:
        gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
        for (i = 0; i < ir->type->vector_elements; i++) {
@@ -3397,10 +3611,17 @@ glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
        inst->resource = buffer;
        if (access)
           inst->buffer_access = access->value.u[0];
+
+      if (inst == this->instructions.get_head_raw())
+         break;
        inst = (glsl_to_tgsi_instruction *)inst->get_prev();
-      if (inst->op == TGSI_OPCODE_UADD)
+
+      if (inst->op == TGSI_OPCODE_UADD) {
+         if (inst == this->instructions.get_head_raw())
+            break;
           inst = (glsl_to_tgsi_instruction *)inst->get_prev();
-   } while (inst && inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
+      }
+   } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
  }
  
  void
@@ -3947,7 +4168,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
  
     switch (ir->op) {
     case ir_tex:
-      opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
+      opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
        if (ir->offset) {
           ir->offset->accept(this);
           offset[0] = this->result;
@@ -4064,11 +4285,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
            * the shadow comparator value must also be projected.
            */
           st_src_reg tmp_src = coord;
-         if (ir->shadow_comparitor) {
+         if (ir->shadow_comparator) {
              /* Slot the shadow value in as the second to last component of the
               * coord.
               */
-            ir->shadow_comparitor->accept(this);
+            ir->shadow_comparator->accept(this);
  
              tmp_src = get_temp(glsl_type::vec4_type);
              st_dst_reg tmp_dst = st_dst_reg(tmp_src);
@@ -4095,11 +4316,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
      * comparator was put in the correct place (and projected) by the code,
      * above, that handles by-hand projection.
      */
-   if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
+   if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
        /* Slot the shadow value in as the second to last component of the
         * coord.
         */
-      ir->shadow_comparitor->accept(this);
+      ir->shadow_comparator->accept(this);
  
        if (is_cube_array) {
           cube_sc = get_temp(glsl_type::float_type);
@@ -4158,7 +4379,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
     } else if (opcode == TGSI_OPCODE_TEX2) {
        inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
     } else if (opcode == TGSI_OPCODE_TG4) {
-      if (is_cube_array && ir->shadow_comparitor) {
+      if (is_cube_array && ir->shadow_comparator) {
           inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
        } else {
           inst = emit_asm(ir, opcode, result_dst, coord, component);
@@ -4166,7 +4387,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
     } else
        inst = emit_asm(ir, opcode, result_dst, coord);
  
-   if (ir->shadow_comparitor)
+   if (ir->shadow_comparator)
        inst->tex_shadow = GL_TRUE;
  
     inst->resource.index = sampler_index;
@@ -4834,7 +5055,8 @@ glsl_to_tgsi_visitor::copy_propagate(void)
            inst->src[0].file != PROGRAM_ARRAY &&
            !inst->src[0].reladdr &&
            !inst->src[0].reladdr2 &&
-          !inst->src[0].negate) {
+          !inst->src[0].negate &&
+          !inst->src[0].abs) {
           for (int i = 0; i < 4; i++) {
              if (inst->dst[0].writemask & (1 << i)) {
                 acp[4 * inst->dst[0].index + i] = inst;
@@ -5279,6 +5501,10 @@ emit_immediate(struct st_translate *t,
        return ureg_DECL_immediate(ureg, &values[0].f, size);
     case GL_DOUBLE:
        return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
+   case GL_INT64_ARB:
+      return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size);
+   case GL_UNSIGNED_INT64_ARB:
+      return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size);
     case GL_INT:
        return ureg_DECL_immediate_int(ureg, &values[0].i, size);
     case GL_UNSIGNED_INT:
@@ -5507,6 +5733,9 @@ translate_src(struct st_translate *t, const st_src_reg *src_reg)
                        GET_SWZ(src_reg->swizzle, 2) & 0x3,
                        GET_SWZ(src_reg->swizzle, 3) & 0x3);
  
+   if (src_reg->abs)
+      src = ureg_abs(src);
+
     if ((src_reg->negate & 0xf) == NEGATE_XYZW)
        src = ureg_negate(src);
  
@@ -6091,6 +6320,7 @@ st_translate_program(
  
           dst = ureg_DECL_output_layout(ureg,
                       outputSemanticName[slot], outputSemanticIndex[slot],
+                     decl->gs_out_streams,
                       slot, tgsi_usage_mask, decl->array_id, decl->size);
  
           for (unsigned j = 0; j < decl->size; ++j) {
@@ -6108,7 +6338,7 @@ st_translate_program(
     }
  
     if (procType == PIPE_SHADER_FRAGMENT) {
-      if (program->shader->info.EarlyFragmentTests)
+      if (program->shader->Program->info.fs.early_fragment_tests)
           ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
  
        if (proginfo->info.inputs_read & VARYING_BIT_POS) {
@@ -6267,18 +6497,14 @@ st_translate_program(
        }
     }
  
-   if (program->shader) {
-      unsigned num_ubos = program->shader->NumUniformBlocks;
-
-      for (i = 0; i < num_ubos; i++) {
-         unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
-         unsigned num_const_vecs = (size + 15) / 16;
-         unsigned first, last;
-         assert(num_const_vecs > 0);
-         first = 0;
-         last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
-         ureg_DECL_constant2D(t->ureg, first, last, i + 1);
-      }
+   for (i = 0; i < proginfo->info.num_ubos; i++) {
+      unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize;
+      unsigned num_const_vecs = (size + 15) / 16;
+      unsigned first, last;
+      assert(num_const_vecs > 0);
+      first = 0;
+      last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
+      ureg_DECL_constant2D(t->ureg, first, last, i + 1);
     }
  
     /* Emit immediate values.
@@ -6340,7 +6566,7 @@ st_translate_program(
     if (program->use_shared_memory)
        t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
  
-   for (i = 0; i < program->shader->NumImages; i++) {
+   for (i = 0; i < program->shader->Program->info.num_images; i++) {
        if (program->images_used & (1 << i)) {
           t->images[i] = ureg_DECL_image(ureg, i,
                                          program->image_targets[i],
@@ -6445,7 +6671,8 @@ get_mesa_program_tgsi(struct gl_context *ctx,
                                                 prog->Parameters);
  
     /* Remove reads from output registers. */
-   lower_output_reads(shader->Stage, shader->ir);
+   if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
+      lower_output_reads(shader->Stage, shader->ir);
  
     /* Emit intermediate IR for main(). */
     visit_exec_list(shader->ir, v);
@@ -6590,173 +6817,55 @@ get_mesa_program_tgsi(struct gl_context *ctx,
     return prog;
  }
  
-static void
-set_affected_state_flags(uint64_t *states,
-                         struct gl_program *prog,
-                         struct gl_linked_shader *shader,
-                         uint64_t new_constants,
-                         uint64_t new_sampler_views,
-                         uint64_t new_samplers,
-                         uint64_t new_images,
-                         uint64_t new_ubos,
-                         uint64_t new_ssbos,
-                         uint64_t new_atomics)
-{
-   if (prog->Parameters->NumParameters)
-      *states |= new_constants;
-
-   if (shader->num_samplers)
-      *states |= new_sampler_views | new_samplers;
-
-   if (shader->NumImages)
-      *states |= new_images;
-
-   if (shader->NumUniformBlocks)
-      *states |= new_ubos;
+/* See if there are unsupported control flow statements. */
+class ir_control_flow_info_visitor : public ir_hierarchical_visitor {
+private:
+   const struct gl_shader_compiler_options *options;
+public:
+   ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options)
+      : options(options),
+        unsupported(false)
+   {
+   }
  
-   if (shader->NumShaderStorageBlocks)
-      *states |= new_ssbos;
+   virtual ir_visitor_status visit_enter(ir_function *ir)
+   {
+      /* Other functions are skipped (same as glsl_to_tgsi). */
+      if (strcmp(ir->name, "main") == 0)
+         return visit_continue;
  
-   if (prog->info.num_abos)
-      *states |= new_atomics;
-}
+      return visit_continue_with_parent;
+   }
  
-static struct gl_program *
-get_mesa_program(struct gl_context *ctx,
-                 struct gl_shader_program *shader_program,
-                 struct gl_linked_shader *shader)
-{
-   struct pipe_screen *pscreen = ctx->st->pipe->screen;
-   enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
-   enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
-      pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
-   struct gl_program *prog = NULL;
-
-   if (preferred_ir == PIPE_SHADER_IR_NIR) {
-      /* TODO only for GLSL VS/FS for now: */
-      switch (shader->Stage) {
-      case MESA_SHADER_VERTEX:
-      case MESA_SHADER_FRAGMENT:
-         prog = st_nir_get_mesa_program(ctx, shader_program, shader);
-      default:
-         break;
+   virtual ir_visitor_status visit_enter(ir_call *ir)
+   {
+      if (!ir->callee->is_intrinsic()) {
+         unsupported = true; /* it's a function call */
+         return visit_stop;
        }
-   } else {
-      prog = get_mesa_program_tgsi(ctx, shader_program, shader);
+      return visit_continue;
     }
  
-   if (prog) {
-      uint64_t *states;
-
-      /* This determines which states will be updated when the shader is
-       * bound.
-       */
-      switch (shader->Stage) {
-      case MESA_SHADER_VERTEX:
-         states = &((struct st_vertex_program*)prog)->affected_states;
-
-         *states = ST_NEW_VS_STATE |
-                   ST_NEW_RASTERIZER |
-                   ST_NEW_VERTEX_ARRAYS;
-
-         set_affected_state_flags(states, prog, shader,
-                                  ST_NEW_VS_CONSTANTS,
-                                  ST_NEW_VS_SAMPLER_VIEWS,
-                                  ST_NEW_RENDER_SAMPLERS,
-                                  ST_NEW_VS_IMAGES,
-                                  ST_NEW_VS_UBOS,
-                                  ST_NEW_VS_SSBOS,
-                                  ST_NEW_VS_ATOMICS);
-         break;
-
-      case MESA_SHADER_TESS_CTRL:
-         states = &((struct st_tessctrl_program*)prog)->affected_states;
-
-         *states = ST_NEW_TCS_STATE;
-
-         set_affected_state_flags(states, prog, shader,
-                                  ST_NEW_TCS_CONSTANTS,
-                                  ST_NEW_TCS_SAMPLER_VIEWS,
-                                  ST_NEW_RENDER_SAMPLERS,
-                                  ST_NEW_TCS_IMAGES,
-                                  ST_NEW_TCS_UBOS,
-                                  ST_NEW_TCS_SSBOS,
-                                  ST_NEW_TCS_ATOMICS);
-         break;
-
-      case MESA_SHADER_TESS_EVAL:
-         states = &((struct st_tesseval_program*)prog)->affected_states;
-
-         *states = ST_NEW_TES_STATE |
-                   ST_NEW_RASTERIZER;
-
-         set_affected_state_flags(states, prog, shader,
-                                  ST_NEW_TES_CONSTANTS,
-                                  ST_NEW_TES_SAMPLER_VIEWS,
-                                  ST_NEW_RENDER_SAMPLERS,
-                                  ST_NEW_TES_IMAGES,
-                                  ST_NEW_TES_UBOS,
-                                  ST_NEW_TES_SSBOS,
-                                  ST_NEW_TES_ATOMICS);
-         break;
-
-      case MESA_SHADER_GEOMETRY:
-         states = &((struct st_geometry_program*)prog)->affected_states;
-
-         *states = ST_NEW_GS_STATE |
-                   ST_NEW_RASTERIZER;
-
-         set_affected_state_flags(states, prog, shader,
-                                  ST_NEW_GS_CONSTANTS,
-                                  ST_NEW_GS_SAMPLER_VIEWS,
-                                  ST_NEW_RENDER_SAMPLERS,
-                                  ST_NEW_GS_IMAGES,
-                                  ST_NEW_GS_UBOS,
-                                  ST_NEW_GS_SSBOS,
-                                  ST_NEW_GS_ATOMICS);
-         break;
-
-      case MESA_SHADER_FRAGMENT:
-         states = &((struct st_fragment_program*)prog)->affected_states;
-
-         /* gl_FragCoord and glDrawPixels always use constants. */
-         *states = ST_NEW_FS_STATE |
-                   ST_NEW_SAMPLE_SHADING |
-                   ST_NEW_FS_CONSTANTS;
-
-         set_affected_state_flags(states, prog, shader,
-                                  ST_NEW_FS_CONSTANTS,
-                                  ST_NEW_FS_SAMPLER_VIEWS,
-                                  ST_NEW_RENDER_SAMPLERS,
-                                  ST_NEW_FS_IMAGES,
-                                  ST_NEW_FS_UBOS,
-                                  ST_NEW_FS_SSBOS,
-                                  ST_NEW_FS_ATOMICS);
-         break;
-
-      case MESA_SHADER_COMPUTE:
-         states = &((struct st_compute_program*)prog)->affected_states;
-
-         *states = ST_NEW_CS_STATE;
-
-         set_affected_state_flags(states, prog, shader,
-                                  ST_NEW_CS_CONSTANTS,
-                                  ST_NEW_CS_SAMPLER_VIEWS,
-                                  ST_NEW_CS_SAMPLERS,
-                                  ST_NEW_CS_IMAGES,
-                                  ST_NEW_CS_UBOS,
-                                  ST_NEW_CS_SSBOS,
-                                  ST_NEW_CS_ATOMICS);
-         break;
-
-      default:
-         unreachable("unhandled shader stage");
+   virtual ir_visitor_status visit_enter(ir_return *ir)
+   {
+      if (options->EmitNoMainReturn) {
+         unsupported = true;
+         return visit_stop;
        }
+      return visit_continue;
     }
  
-   return prog;
-}
+   bool unsupported;
+};
  
+static bool
+has_unsupported_control_flow(exec_list *ir,
+                             const struct gl_shader_compiler_options *options)
+{
+   ir_control_flow_info_visitor visitor(options);
+   visit_list_elements(&visitor, ir);
+   return visitor.unsupported;
+}
  
  extern "C" {
  
@@ -6769,6 +6878,11 @@ extern "C" {
  GLboolean
  st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  {
+   /* Return early if we are loading the shader from on-disk cache */
+   if (st_load_tgsi_from_disk_cache(ctx, prog)) {
+      return GL_TRUE;
+   }
+
     struct pipe_screen *pscreen = ctx->st->pipe->screen;
     assert(prog->data->LinkStatus);
  
@@ -6776,9 +6890,9 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
        if (prog->_LinkedShaders[i] == NULL)
           continue;
  
-      bool progress;
-      exec_list *ir = prog->_LinkedShaders[i]->ir;
-      gl_shader_stage stage = prog->_LinkedShaders[i]->Stage;
+      struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+      exec_list *ir = shader->ir;
+      gl_shader_stage stage = shader->Stage;
        const struct gl_shader_compiler_options *options =
              &ctx->Const.ShaderCompilerOptions[stage];
        enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage);
@@ -6794,13 +6908,16 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
         */
        if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
            options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
-         lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
+         lower_variable_index_to_cond_assign(stage, ir,
                                               options->EmitNoIndirectInput,
                                               options->EmitNoIndirectOutput,
                                               options->EmitNoIndirectTemp,
                                               options->EmitNoIndirectUniform);
        }
  
+      if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD))
+         lower_64bit_integer_instructions(ir, DIV64 | MOD64);
+
        if (ctx->Extensions.ARB_shading_language_packing) {
           unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
                                 LOWER_UNPACK_SNORM_2x16 |
@@ -6824,9 +6941,13 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
        if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
           lower_offset_arrays(ir);
        do_mat_op_to_vec(ir);
+
+      if (stage == MESA_SHADER_FRAGMENT)
+         lower_blend_equation_advanced(shader);
+
        lower_instructions(ir,
                           MOD_TO_FLOOR |
-                         DIV_TO_MUL_RCP |
+                         FDIV_TO_MUL_RCP |
                           EXP_TO_EXP2 |
                           LOG_TO_LOG2 |
                           LDEXP_TO_ARITH |
@@ -6860,20 +6981,24 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
           lower_discard(ir);
        }
  
-      do {
-         progress = false;
-
-         progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
-
-         progress = do_common_optimization(ir, true, true, options,
-                                           ctx->Const.NativeIntegers)
-           || progress;
-
-         progress = lower_if_to_cond_assign((gl_shader_stage)i, ir,
-                                            options->MaxIfDepth, if_threshold) ||
-                    progress;
-
-      } while (progress);
+      if (ctx->Const.GLSLOptimizeConservatively) {
+         /* Do it once and repeat only if there's unsupported control flow. */
+         do {
+            do_common_optimization(ir, true, true, options,
+                                   ctx->Const.NativeIntegers);
+            lower_if_to_cond_assign((gl_shader_stage)i, ir,
+                                    options->MaxIfDepth, if_threshold);
+         } while (has_unsupported_control_flow(ir, options));
+      } else {
+         /* Repeat it until it stops making changes. */
+         bool progress;
+         do {
+            progress = do_common_optimization(ir, true, true, options,
+                                              ctx->Const.NativeIntegers);
+            progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir,
+                                                options->MaxIfDepth, if_threshold);
+         } while (progress);
+      }
  
        validate_ir_tree(ir);
     }
@@ -6881,19 +7006,36 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
     build_program_resource_list(ctx, prog);
  
     for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
-      struct gl_program *linked_prog;
-
-      if (prog->_LinkedShaders[i] == NULL)
+      struct gl_linked_shader *shader = prog->_LinkedShaders[i];
+      if (shader == NULL)
           continue;
  
-      linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
+      enum pipe_shader_type ptarget =
+         st_shader_stage_to_ptarget(shader->Stage);
+      enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
+         pscreen->get_shader_param(pscreen, ptarget,
+                                   PIPE_SHADER_CAP_PREFERRED_IR);
+
+      struct gl_program *linked_prog = NULL;
+      if (preferred_ir == PIPE_SHADER_IR_NIR) {
+         /* TODO only for GLSL VS/FS for now: */
+         switch (shader->Stage) {
+         case MESA_SHADER_VERTEX:
+         case MESA_SHADER_FRAGMENT:
+            linked_prog = st_nir_get_mesa_program(ctx, prog, shader);
+         default:
+            break;
+         }
+      } else {
+         linked_prog = get_mesa_program_tgsi(ctx, prog, shader);
+      }
  
        if (linked_prog) {
+         st_set_prog_affected_state_flags(linked_prog);
           if (!ctx->Driver.ProgramStringNotify(ctx,
                                                _mesa_shader_stage_to_program(i),
                                                linked_prog)) {
-            _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
-                                    NULL);
+            _mesa_reference_program(ctx, &shader->Program, NULL);
              return GL_FALSE;
           }
        }
@@ -6907,8 +7049,11 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
                                  const GLuint outputMapping[],
                                  struct pipe_stream_output_info *so)
  {
+   if (!glsl_to_tgsi->shader_program->last_vert_prog)
+      return;
+
     struct gl_transform_feedback_info *info =
-      &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
+      glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback;
     st_translate_stream_output_info2(info, outputMapping, so);
  }