gallivm/nir: support passing image index into image code.
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_nir.c
index 547161ba7fe3de2ff7c5e7f9fb91f0ff4450eb70..8daa0e09d8d1740e092db4d38428f2d77a431623 100644 (file)
@@ -57,6 +57,10 @@ static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRe
       break;
    case nir_type_int:
       switch (bit_size) {
+      case 8:
+         return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
+      case 16:
+         return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
       case 32:
          return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
       case 64:
@@ -68,6 +72,10 @@ static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRe
       break;
    case nir_type_uint:
       switch (bit_size) {
+      case 8:
+         return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
+      case 16:
+         return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
       case 32:
          return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
       case 64:
@@ -85,20 +93,6 @@ static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRe
    return NULL;
 }
 
-static struct lp_build_context *get_int_bld(struct lp_build_nir_context *bld_base,
-                                            bool is_unsigned,
-                                            unsigned op_bit_size)
-{
-   if (is_unsigned)
-      if (op_bit_size == 64)
-         return &bld_base->uint64_bld;
-      else
-         return &bld_base->uint_bld;
-   else if (op_bit_size == 64)
-      return &bld_base->int64_bld;
-   else
-      return &bld_base->int_bld;
-}
 
 static struct lp_build_context *get_flt_bld(struct lp_build_nir_context *bld_base,
                                             unsigned op_bit_size)
@@ -122,6 +116,9 @@ static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
    case GLSL_SAMPLER_DIM_3D:
       pipe_target = PIPE_TEXTURE_3D;
       break;
+   case GLSL_SAMPLER_DIM_MS:
+      pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
+      break;
    case GLSL_SAMPLER_DIM_CUBE:
       pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
       break;
@@ -169,14 +166,14 @@ static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValue
 }
 
 static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
-                            LLVMValueRef vals[4])
+                            LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
 {
    assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
 }
 
 static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
                        unsigned write_mask,
-                       LLVMValueRef vals[4])
+                       LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
 {
    struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
    LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
@@ -187,7 +184,7 @@ static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest
    bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
 }
 
-static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[4])
+static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
 {
    if (dest->is_ssa)
       assign_ssa_dest(bld_base, &dest->ssa, vals);
@@ -195,7 +192,7 @@ static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *d
       assign_reg(bld_base, &dest->reg, 0, vals);
 }
 
-static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[4])
+static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
 {
    if (dest->dest.is_ssa)
       assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
@@ -231,7 +228,7 @@ static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
 static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
                            enum pipe_compare_func compare,
                            uint32_t src_bit_size,
-                           LLVMValueRef src[4])
+                           LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
@@ -250,12 +247,14 @@ static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
                            enum pipe_compare_func compare,
                            bool is_unsigned,
                            uint32_t src_bit_size,
-                           LLVMValueRef src[4])
+                           LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
    struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
    LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
-   if (src_bit_size == 64)
+   if (src_bit_size < 32)
+      result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
+   else if (src_bit_size == 64)
       result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
    return result;
 }
@@ -282,7 +281,7 @@ static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
          value = LLVMBuildExtractValue(gallivm->builder, value,
                                        src.swizzle[0], "");
       } else if (src_components == 1 && num_components > 1) {
-         LLVMValueRef values[] = {value, value, value, value};
+         LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
          value = lp_nir_array_build_gather_values(builder, values, num_components);
       } else {
          LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
@@ -336,8 +335,8 @@ static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
 }
 
 static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
-                               unsigned src_bit_size[4],
-                               LLVMValueRef src[4])
+                               unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
+                               LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
    LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
@@ -354,8 +353,13 @@ static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
    int len = bld_base->base.type.length * 2;
    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
+#if UTIL_ARCH_LITTLE_ENDIAN
       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+#else
+      shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
+#endif
    }
 
    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
@@ -379,14 +383,66 @@ merge_64bit(struct lp_build_nir_context *bld_base,
    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
 
    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+#if UTIL_ARCH_LITTLE_ENDIAN
       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+#else
+      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
+#endif
    }
    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
 }
 
+static LLVMValueRef
+do_int_divide(struct lp_build_nir_context *bld_base,
+              bool is_unsigned, unsigned src_bit_size,
+              LLVMValueRef src, LLVMValueRef src2)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
+   struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
+   LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
+                                        mask_bld->zero);
+
+   if (!is_unsigned) {
+      /* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */
+      div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), "");
+   }
+   LLVMValueRef divisor = LLVMBuildOr(builder,
+                                      div_mask,
+                                      src2, "");
+   LLVMValueRef result = lp_build_div(int_bld, src, divisor);
+
+   if (!is_unsigned) {
+      LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
+      return LLVMBuildAnd(builder, not_div_mask, result, "");
+   } else
+      /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
+       * may as well do same for idiv */
+      return LLVMBuildOr(builder, div_mask, result, "");
+}
+
+static LLVMValueRef
+do_int_mod(struct lp_build_nir_context *bld_base,
+           bool is_unsigned, unsigned src_bit_size,
+           LLVMValueRef src, LLVMValueRef src2)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
+   LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2,
+                                        int_bld->zero);
+   LLVMValueRef divisor = LLVMBuildOr(builder,
+                                      div_mask,
+                                      src2, "");
+   LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
+   return LLVMBuildOr(builder, div_mask, result, "");
+}
+
 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
-                                  nir_op op, unsigned src_bit_size[4], LLVMValueRef src[4])
+                                  nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
@@ -459,9 +515,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_cos(&bld_base->base, src[0]);
       break;
    case nir_op_fddx:
+   case nir_op_fddx_coarse:
+   case nir_op_fddx_fine:
       result = lp_build_ddx(&bld_base->base, src[0]);
       break;
    case nir_op_fddy:
+   case nir_op_fddy_coarse:
+   case nir_op_fddy_fine:
       result = lp_build_ddy(&bld_base->base, src[0]);
       break;
    case nir_op_fdiv:
@@ -558,14 +618,26 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
    case nir_op_i2f64:
       result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
       break;
+   case nir_op_i2i8:
+      result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
+      break;
+   case nir_op_i2i16:
+      if (src_bit_size[0] < 16)
+         result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
+      else
+         result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
+      break;
    case nir_op_i2i32:
-      result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
+      if (src_bit_size[0] < 32)
+         result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
+      else
+         result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
       break;
    case nir_op_i2i64:
       result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
       break;
    case nir_op_iabs:
-      result = lp_build_abs(&bld_base->int_bld, src[0]);
+      result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
       break;
    case nir_op_iadd:
       result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
@@ -576,8 +648,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
                             src[0], src[1]);
       break;
    case nir_op_idiv:
-      result = lp_build_div(&bld_base->int_bld,
-                            src[0], src[1]);
+      result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
       break;
    case nir_op_ieq32:
       result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
@@ -589,13 +660,14 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
       break;
    case nir_op_imax:
-      result = lp_build_max(&bld_base->int_bld, src[0], src[1]);
+      result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
       break;
    case nir_op_imin:
-      result = lp_build_min(&bld_base->int_bld, src[0], src[1]);
+      result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
       break;
    case nir_op_imul:
-      result = lp_build_mul(&bld_base->int_bld,
+   case nir_op_imul24:
+      result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
                             src[0], src[1]);
       break;
    case nir_op_imul_high: {
@@ -617,16 +689,37 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
                            src[0], src[1]);
       break;
-   case nir_op_ishl:
-      src[1] = lp_build_and(&bld_base->uint_bld, src[1], lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, (src_bit_size[0] - 1)));
-      result = lp_build_shl(&bld_base->int_bld, src[0], src[1]);
+   case nir_op_irem:
+      result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
       break;
-   case nir_op_ishr:
-      src[1] = lp_build_and(&bld_base->uint_bld, src[1], lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, (src_bit_size[0] - 1)));
-      result = lp_build_shr(&bld_base->int_bld, src[0], src[1]);
+   case nir_op_ishl: {
+      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
+      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
+      if (src_bit_size[0] == 64)
+         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
+      if (src_bit_size[0] < 32)
+         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
+      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
+      result = lp_build_shl(int_bld, src[0], src[1]);
+      break;
+   }
+   case nir_op_ishr: {
+      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
+      struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
+      if (src_bit_size[0] == 64)
+         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
+      if (src_bit_size[0] < 32)
+         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
+      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
+      result = lp_build_shr(int_bld, src[0], src[1]);
       break;
+   }
    case nir_op_isign:
-      result = lp_build_sgn(&bld_base->int_bld, src[0]);
+      result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
+      break;
+   case nir_op_isub:
+      result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
+                            src[0], src[1]);
       break;
    case nir_op_ixor:
       result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
@@ -653,15 +746,26 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
    case nir_op_u2f64:
       result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
       break;
+   case nir_op_u2u8:
+      result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
+      break;
+   case nir_op_u2u16:
+      if (src_bit_size[0] < 16)
+         result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
+      else
+         result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
+      break;
    case nir_op_u2u32:
-      result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
+      if (src_bit_size[0] < 32)
+         result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
+      else
+         result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
       break;
    case nir_op_u2u64:
       result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
       break;
    case nir_op_udiv:
-      result = lp_build_div(&bld_base->uint_bld,
-                            src[0], src[1]);
+      result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
       break;
    case nir_op_ufind_msb: {
       struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
@@ -676,13 +780,13 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
       break;
    case nir_op_umax:
-      result = lp_build_max(&bld_base->uint_bld, src[0], src[1]);
+      result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
       break;
    case nir_op_umin:
-      result = lp_build_min(&bld_base->uint_bld, src[0], src[1]);
+      result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
       break;
    case nir_op_umod:
-      result = lp_build_mod(&bld_base->uint_bld, src[0], src[1]);
+      result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
       break;
    case nir_op_umul_high: {
       LLVMValueRef hi_bits;
@@ -690,10 +794,16 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = hi_bits;
       break;
    }
-   case nir_op_ushr:
-      src[1] = lp_build_and(&bld_base->uint_bld, src[1], lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, (src_bit_size[0] - 1)));
-      result = lp_build_shr(&bld_base->uint_bld, src[0], src[1]);
+   case nir_op_ushr: {
+      struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
+      if (src_bit_size[0] == 64)
+         src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
+      if (src_bit_size[0] < 32)
+         src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
+      src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
+      result = lp_build_shr(uint_bld, src[0], src[1]);
       break;
+   }
    default:
       assert(0);
       break;
@@ -704,14 +814,16 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
 static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
-   LLVMValueRef src[4];
-   unsigned src_bit_size[4];
+   LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
+   unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
    unsigned num_components = nir_dest_num_components(instr->dest.dest);
    unsigned src_components;
    switch (instr->op) {
    case nir_op_vec2:
    case nir_op_vec3:
    case nir_op_vec4:
+   case nir_op_vec8:
+   case nir_op_vec16:
       src_components = 1;
       break;
    case nir_op_pack_half_2x16:
@@ -733,14 +845,14 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
       src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
    }
 
-   LLVMValueRef result[4];
-   if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2) {
+   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
+   if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
          result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
       }
    } else {
       for (unsigned c = 0; c < num_components; c++) {
-         LLVMValueRef src_chan[4];
+         LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
 
          for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
             if (num_components > 1) {
@@ -760,10 +872,10 @@ static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr
 static void visit_load_const(struct lp_build_nir_context *bld_base,
                              const nir_load_const_instr *instr)
 {
-   LLVMValueRef result[4];
+   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
    for (unsigned i = 0; i < instr->def.num_components; i++)
-      result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->value[i].u64);
+      result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
    assign_ssa_dest(bld_base, &instr->def, result);
 }
 
@@ -840,13 +952,14 @@ out:
 
 static void visit_load_var(struct lp_build_nir_context *bld_base,
                            nir_intrinsic_instr *instr,
-                           LLVMValueRef result[4])
+                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
    nir_variable *var = nir_deref_instr_get_variable(deref);
    nir_variable_mode mode = deref->mode;
    unsigned const_index;
    LLVMValueRef indir_index;
+   LLVMValueRef indir_vertex_index = NULL;
    unsigned vertex_index = 0;
    unsigned nc = nir_dest_num_components(instr->dest);
    unsigned bit_size = nir_dest_bit_size(instr->dest);
@@ -855,12 +968,19 @@ static void visit_load_var(struct lp_build_nir_context *bld_base,
          var->data.mode == nir_var_shader_in;
       bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
          var->data.mode == nir_var_shader_in;
+      bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
+         var->data.mode == nir_var_shader_in;
+      bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
+         var->data.mode == nir_var_shader_out && !var->data.patch;
+      bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
+         var->data.mode == nir_var_shader_in && !var->data.patch;
+
       mode = var->data.mode;
 
-      get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, NULL,
+      get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
                        &const_index, &indir_index);
    }
-   bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, const_index, indir_index, result);
+   bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
 }
 
 static void
@@ -874,16 +994,19 @@ visit_store_var(struct lp_build_nir_context *bld_base,
    unsigned bit_size = nir_src_bit_size(instr->src[1]);
    LLVMValueRef src = get_src(bld_base, instr->src[1]);
    unsigned const_index = 0;
-   LLVMValueRef indir_index;
-   if (var)
-      get_deref_offset(bld_base, deref, false, NULL, NULL,
+   LLVMValueRef indir_index, indir_vertex_index = NULL;
+   if (var) {
+      bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
+         var->data.mode == nir_var_shader_out && !var->data.patch;
+      get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
                        &const_index, &indir_index);
-   bld_base->store_var(bld_base, mode, bit_size, instr->num_components, writemask, const_index, var, src);
+   }
+   bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
 }
 
 static void visit_load_ubo(struct lp_build_nir_context *bld_base,
                            nir_intrinsic_instr *instr,
-                           LLVMValueRef result[4])
+                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
@@ -899,7 +1022,7 @@ static void visit_load_ubo(struct lp_build_nir_context *bld_base,
 
 static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
                            nir_intrinsic_instr *instr,
-                           LLVMValueRef result[4])
+                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
@@ -921,7 +1044,7 @@ static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
 
 static void visit_get_buffer_size(struct lp_build_nir_context *bld_base,
                                   nir_intrinsic_instr *instr,
-                                  LLVMValueRef result[4])
+                                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
    result[0] = bld_base->get_buffer_size(bld_base, idx);
@@ -929,7 +1052,7 @@ static void visit_get_buffer_size(struct lp_build_nir_context *bld_base,
 
 static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
                               nir_intrinsic_instr *instr,
-                              LLVMValueRef result[4])
+                              LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef idx = get_src(bld_base, instr->src[0]);
    LLVMValueRef offset = get_src(bld_base, instr->src[1]);
@@ -944,7 +1067,7 @@ static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
 
 static void visit_load_image(struct lp_build_nir_context *bld_base,
                              nir_intrinsic_instr *instr,
-                             LLVMValueRef result[4])
+                             LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
@@ -954,6 +1077,10 @@ static void visit_load_image(struct lp_build_nir_context *bld_base,
    LLVMValueRef coords[5];
    struct lp_img_params params;
    const struct glsl_type *type = glsl_without_array(var->type);
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
 
    memset(&params, 0, sizeof(params));
    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
@@ -965,7 +1092,10 @@ static void visit_load_image(struct lp_build_nir_context *bld_base,
    params.coords = coords;
    params.outdata = result;
    params.img_op = LP_IMG_LOAD;
-   params.image_index = var->data.binding;
+   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
+      params.ms_index = get_src(bld_base, instr->src[2]);
+   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+   params.image_index_offset = indir_index;
    bld_base->image_op(bld_base, &params);
 }
 
@@ -981,6 +1111,10 @@ static void visit_store_image(struct lp_build_nir_context *bld_base,
    LLVMValueRef coords[5];
    struct lp_img_params params;
    const struct glsl_type *type = glsl_without_array(var->type);
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
 
    memset(&params, 0, sizeof(params));
    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
@@ -994,8 +1128,11 @@ static void visit_store_image(struct lp_build_nir_context *bld_base,
       params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
       params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
    }
+   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
+      params.ms_index = get_src(bld_base, instr->src[2]);
    params.img_op = LP_IMG_STORE;
-   params.image_index = var->data.binding;
+   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+   params.image_index_offset = indir_index;
 
    if (params.target == PIPE_TEXTURE_1D_ARRAY)
       coords[2] = coords[1];
@@ -1004,7 +1141,7 @@ static void visit_store_image(struct lp_build_nir_context *bld_base,
 
 static void visit_atomic_image(struct lp_build_nir_context *bld_base,
                                nir_intrinsic_instr *instr,
-                               LLVMValueRef result[4])
+                               LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
@@ -1015,6 +1152,10 @@ static void visit_atomic_image(struct lp_build_nir_context *bld_base,
    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
    LLVMValueRef coords[5];
    const struct glsl_type *type = glsl_without_array(var->type);
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
 
    memset(&params, 0, sizeof(params));
 
@@ -1056,6 +1197,8 @@ static void visit_atomic_image(struct lp_build_nir_context *bld_base,
    if (params.target == PIPE_TEXTURE_1D_ARRAY)
       coords[2] = coords[1];
    params.coords = coords;
+   if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
+      params.ms_index = get_src(bld_base, instr->src[2]);
    if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
       LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
       params.indata[0] = in_val;
@@ -1065,7 +1208,8 @@ static void visit_atomic_image(struct lp_build_nir_context *bld_base,
 
    params.outdata = result;
    params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
-   params.image_index = var->data.binding;
+   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+   params.image_index_offset = indir_index;
 
    bld_base->image_op(bld_base, &params);
 }
@@ -1073,21 +1217,49 @@ static void visit_atomic_image(struct lp_build_nir_context *bld_base,
 
 static void visit_image_size(struct lp_build_nir_context *bld_base,
                              nir_intrinsic_instr *instr,
-                             LLVMValueRef result[4])
+                             LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
+{
+   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+   struct lp_sampler_size_query_params params = { 0 };
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   const struct glsl_type *type = glsl_without_array(var->type);
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
+   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
+   params.texture_unit_offset = indir_index;
+   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
+   params.sizes_out = result;
+
+   bld_base->image_size(bld_base, &params);
+}
+
+static void visit_image_samples(struct lp_build_nir_context *bld_base,
+                                nir_intrinsic_instr *instr,
+                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
    nir_variable *var = nir_deref_instr_get_variable(deref);
    struct lp_sampler_size_query_params params = { 0 };
-   params.texture_unit = var->data.binding;
-   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(var->type), glsl_sampler_type_is_array(var->type));
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   const struct glsl_type *type = glsl_without_array(var->type);
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
+
+   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
+   params.texture_unit_offset = indir_index;
+   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
    params.sizes_out = result;
+   params.samples_only = true;
 
    bld_base->image_size(bld_base, &params);
 }
 
 static void visit_shared_load(struct lp_build_nir_context *bld_base,
                                 nir_intrinsic_instr *instr,
-                                LLVMValueRef result[4])
+                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
    bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
@@ -1107,7 +1279,7 @@ static void visit_shared_store(struct lp_build_nir_context *bld_base,
 
 static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
                                 nir_intrinsic_instr *instr,
-                                LLVMValueRef result[4])
+                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
 {
    LLVMValueRef offset = get_src(bld_base, instr->src[0]);
    LLVMValueRef val = get_src(bld_base, instr->src[1]);
@@ -1135,10 +1307,85 @@ static void visit_discard(struct lp_build_nir_context *bld_base,
    bld_base->discard(bld_base, cond);
 }
 
+static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
+                                    nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
+{
+   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
+
+   bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
+   bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
+                             nir_src_bit_size(instr->src[0]),
+                             offset_is_uniform, offset, result);
+}
+
+static void visit_load_global(struct lp_build_nir_context *bld_base,
+                              nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
+{
+   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
+   bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
+                         nir_src_bit_size(instr->src[0]),
+                         addr, result);
+}
+
+static void visit_store_global(struct lp_build_nir_context *bld_base,
+                               nir_intrinsic_instr *instr)
+{
+   LLVMValueRef val = get_src(bld_base, instr->src[0]);
+   int nc = nir_src_num_components(instr->src[0]);
+   int bitsize = nir_src_bit_size(instr->src[0]);
+   LLVMValueRef addr = get_src(bld_base, instr->src[1]);
+   int addr_bitsize = nir_src_bit_size(instr->src[1]);
+   int writemask = instr->const_index[0];
+   bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
+}
+
+static void visit_global_atomic(struct lp_build_nir_context *bld_base,
+                                nir_intrinsic_instr *instr,
+                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
+{
+   LLVMValueRef addr = get_src(bld_base, instr->src[0]);
+   LLVMValueRef val = get_src(bld_base, instr->src[1]);
+   LLVMValueRef val2 = NULL;
+   int addr_bitsize = nir_src_bit_size(instr->src[0]);
+   if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
+      val2 = get_src(bld_base, instr->src[2]);
+
+   bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize, addr, val, val2, &result[0]);
+}
+
+static void visit_interp(struct lp_build_nir_context *bld_base,
+                         nir_intrinsic_instr *instr,
+                         LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+   unsigned num_components = nir_dest_num_components(instr->dest);
+   nir_variable *var = nir_deref_instr_get_variable(deref);
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   LLVMValueRef offsets[2] = { NULL, NULL };
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
+   bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
+   bool sample = false;
+   if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
+      for (unsigned i = 0; i < 2; i++) {
+         offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
+         offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
+      }
+   } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
+      offsets[0] = get_src(bld_base, instr->src[1]);
+      offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
+      sample = true;
+   }
+   bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
+}
+
 static void visit_intrinsic(struct lp_build_nir_context *bld_base,
                             nir_intrinsic_instr *instr)
 {
-   LLVMValueRef result[4] = {0};
+   LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
    switch (instr->intrinsic) {
    case nir_intrinsic_load_deref:
       visit_load_var(bld_base, instr, result);
@@ -1162,13 +1409,27 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_load_primitive_id:
    case nir_intrinsic_load_instance_id:
    case nir_intrinsic_load_base_instance:
+   case nir_intrinsic_load_base_vertex:
    case nir_intrinsic_load_work_group_id:
    case nir_intrinsic_load_local_invocation_id:
    case nir_intrinsic_load_num_work_groups:
    case nir_intrinsic_load_invocation_id:
    case nir_intrinsic_load_front_face:
+   case nir_intrinsic_load_draw_id:
+   case nir_intrinsic_load_local_group_size:
+   case nir_intrinsic_load_work_dim:
+   case nir_intrinsic_load_tess_coord:
+   case nir_intrinsic_load_tess_level_outer:
+   case nir_intrinsic_load_tess_level_inner:
+   case nir_intrinsic_load_patch_vertices_in:
+   case nir_intrinsic_load_sample_id:
+   case nir_intrinsic_load_sample_pos:
+   case nir_intrinsic_load_sample_mask_in:
       bld_base->sysval_intrin(bld_base, instr, result);
       break;
+   case nir_intrinsic_load_helper_invocation:
+      bld_base->helper_invocation(bld_base, &result[0]);
+      break;
    case nir_intrinsic_discard_if:
    case nir_intrinsic_discard:
       visit_discard(bld_base, instr);
@@ -1212,6 +1473,9 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_image_deref_size:
       visit_image_size(bld_base, instr, result);
       break;
+   case nir_intrinsic_image_deref_samples:
+      visit_image_samples(bld_base, instr, result);
+      break;
    case nir_intrinsic_load_shared:
       visit_shared_load(bld_base, instr, result);
       break;
@@ -1230,10 +1494,46 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_shared_atomic_comp_swap:
       visit_shared_atomic(bld_base, instr, result);
       break;
-   case nir_intrinsic_barrier:
+   case nir_intrinsic_control_barrier:
       visit_barrier(bld_base);
       break;
+   case nir_intrinsic_group_memory_barrier:
    case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_shared:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_tcs_patch:
+      break;
+   case nir_intrinsic_load_kernel_input:
+      visit_load_kernel_input(bld_base, instr, result);
+     break;
+   case nir_intrinsic_load_global:
+      visit_load_global(bld_base, instr, result);
+      break;
+   case nir_intrinsic_store_global:
+      visit_store_global(bld_base, instr);
+      break;
+   case nir_intrinsic_global_atomic_add:
+   case nir_intrinsic_global_atomic_imin:
+   case nir_intrinsic_global_atomic_umin:
+   case nir_intrinsic_global_atomic_imax:
+   case nir_intrinsic_global_atomic_umax:
+   case nir_intrinsic_global_atomic_and:
+   case nir_intrinsic_global_atomic_or:
+   case nir_intrinsic_global_atomic_xor:
+   case nir_intrinsic_global_atomic_exchange:
+   case nir_intrinsic_global_atomic_comp_swap:
+      visit_global_atomic(bld_base, instr, result);
+      break;
+   case nir_intrinsic_vote_all:
+   case nir_intrinsic_vote_any:
+   case nir_intrinsic_vote_ieq:
+      bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
+      break;
+   case nir_intrinsic_interp_deref_at_offset:
+   case nir_intrinsic_interp_deref_at_centroid:
+   case nir_intrinsic_interp_deref_at_sample:
+      visit_interp(bld_base, instr, result);
       break;
    default:
       assert(0);
@@ -1246,15 +1546,18 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
 
 static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
 {
-   struct lp_sampler_size_query_params params;
-   LLVMValueRef sizes_out[4];
+   struct lp_sampler_size_query_params params = { 0 };
+   LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
    LLVMValueRef explicit_lod = NULL;
-
+   LLVMValueRef texture_unit_offset = NULL;
    for (unsigned i = 0; i < instr->num_srcs; i++) {
       switch (instr->src[i].src_type) {
       case nir_tex_src_lod:
          explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
          break;
+      case nir_tex_src_texture_offset:
+         texture_unit_offset = get_src(bld_base, instr->src[i].src);
+         break;
       default:
          break;
       }
@@ -1265,6 +1568,8 @@ static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
    params.explicit_lod = explicit_lod;
    params.is_sviewinfo = TRUE;
    params.sizes_out = sizes_out;
+   params.samples_only = (instr->op == nir_texop_texture_samples);
+   params.texture_unit_offset = texture_unit_offset;
 
    if (instr->op == nir_texop_query_levels)
       params.explicit_lod = bld_base->uint_bld.zero;
@@ -1296,28 +1601,30 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef coords[5];
    LLVMValueRef offsets[3] = { NULL };
-   LLVMValueRef explicit_lod = NULL, projector = NULL;
+   LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL;
    struct lp_sampler_params params;
    struct lp_derivatives derivs;
    unsigned sample_key = 0;
    nir_deref_instr *texture_deref_instr = NULL;
    nir_deref_instr *sampler_deref_instr = NULL;
-   LLVMValueRef texel[4];
+   LLVMValueRef texture_unit_offset = NULL;
+   LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
    unsigned lod_src = 0;
    LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
 
    memset(&params, 0, sizeof(params));
    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
 
-   if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels) {
+   if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
       visit_txs(bld_base, instr);
       return;
    }
    if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
       sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
-   else if (instr->op == nir_texop_tg4)
+   else if (instr->op == nir_texop_tg4) {
       sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
-   else if (instr->op == nir_texop_lod)
+      sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
+   } else if (instr->op == nir_texop_lod)
       sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
    for (unsigned i = 0; i < instr->num_srcs; i++) {
       switch (instr->src[i].src_type) {
@@ -1399,15 +1706,25 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
          LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
          sample_key |= LP_SAMPLER_OFFSETS;
          if (offset_cnt == 1)
-            offsets[0] = offset_val;
+            offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
          else {
-            for (unsigned chan = 0; chan < offset_cnt; ++chan)
+            for (unsigned chan = 0; chan < offset_cnt; ++chan) {
                offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
                                                      chan, "");
+               offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
+            }
          }
          break;
       }
       case nir_tex_src_ms_index:
+         sample_key |= LP_SAMPLER_FETCH_MS;
+         ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
+         break;
+
+      case nir_tex_src_texture_offset:
+         texture_unit_offset = get_src(bld_base, instr->src[i].src);
+         break;
+      case nir_tex_src_sampler_offset:
          break;
       default:
          assert(0);
@@ -1465,10 +1782,12 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
    params.sample_key = sample_key;
    params.offsets = offsets;
    params.texture_index = base_index;
+   params.texture_index_offset = texture_unit_offset;
    params.sampler_index = base_index;
    params.coords = coords;
    params.texel = texel;
    params.lod = explicit_lod;
+   params.ms_index = ms_index;
    bld_base->tex(bld_base, &params);
    assign_dest(bld_base, &instr->dest, texel);
 }
@@ -1477,9 +1796,10 @@ static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
                             const nir_ssa_undef_instr *instr)
 {
    unsigned num_components = instr->def.num_components;
-   LLVMValueRef undef[4];
+   LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
+   struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
    for (unsigned i = 0; i < num_components; i++)
-      undef[i] = LLVMGetUndef(bld_base->base.vec_type);
+      undef[i] = LLVMGetUndef(undef_bld->vec_type);
    assign_ssa_dest(bld_base, &instr->def, undef);
 }
 
@@ -1641,7 +1961,7 @@ bool lp_build_nir_llvm(
    nir_convert_from_ssa(nir, true);
    nir_lower_locals_to_regs(nir);
    nir_remove_dead_derefs(nir);
-   nir_remove_dead_variables(nir, nir_var_function_temp);
+   nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
 
    nir_foreach_variable(variable, &nir->outputs)
       handle_shader_output_decl(bld_base, nir, variable);
@@ -1677,6 +1997,10 @@ void lp_build_opt_nir(struct nir_shader *nir)
       progress = false;
       NIR_PASS_V(nir, nir_opt_constant_folding);
       NIR_PASS_V(nir, nir_opt_algebraic);
+      NIR_PASS_V(nir, nir_lower_pack);
+
+      nir_lower_tex_options options = { .lower_tex_without_implicit_lod = true };
+      NIR_PASS_V(nir, nir_lower_tex, &options);
    } while (progress);
    nir_lower_bool_to_int32(nir);
 }