radeonsi: change the bit-packing of LS out/TCS in data
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index 21efd9a0218f8284da1509f15d1e88c54248f0e5..5c17c640a3c9462500594b3f040eea3f6c376a9b 100644 (file)
@@ -72,6 +72,8 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
                               FILE *f);
 
+static unsigned llvm_get_type_size(LLVMTypeRef type);
+
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
                                        union si_shader_part_key *key);
 static void si_build_vs_epilog_function(struct si_shader_context *ctx,
@@ -225,9 +227,9 @@ static LLVMValueRef
 get_tcs_in_patch_stride(struct si_shader_context *ctx)
 {
        if (ctx->type == PIPE_SHADER_VERTEX)
-               return unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 0, 13);
+               return unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 8, 13);
        else if (ctx->type == PIPE_SHADER_TESS_CTRL)
-               return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 0, 13);
+               return unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 8, 13);
        else {
                assert(0);
                return NULL;
@@ -303,7 +305,7 @@ static LLVMValueRef get_instance_index_for_fetch(
        struct si_shader_context *ctx,
        unsigned param_start_instance, unsigned divisor)
 {
-       struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
 
        LLVMValueRef result = LLVMGetParam(ctx->main_fn,
                                           ctx->param_instance_id);
@@ -338,8 +340,7 @@ static void declare_input_vs(
        const struct tgsi_full_declaration *decl,
        LLVMValueRef out[4])
 {
-       struct lp_build_context *base = &ctx->bld_base.base;
-       struct gallivm_state *gallivm = base->gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
 
        unsigned chan;
        unsigned fix_fetch;
@@ -545,7 +546,7 @@ static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
        struct si_shader_context *ctx = si_shader_context(bld_base);
 
        if (swizzle > 0)
-               return bld_base->uint_bld.zero;
+               return ctx->i32_0;
 
        switch (ctx->type) {
        case PIPE_SHADER_VERTEX:
@@ -562,7 +563,7 @@ static LLVMValueRef get_primitive_id(struct lp_build_tgsi_context *bld_base,
                                    SI_PARAM_PRIMITIVE_ID);
        default:
                assert(0);
-               return bld_base->uint_bld.zero;
+               return ctx->i32_0;
        }
 }
 
@@ -574,7 +575,7 @@ static LLVMValueRef get_indirect_index(struct si_shader_context *ctx,
                                       const struct tgsi_ind_register *ind,
                                       int rel_index)
 {
-       struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef result;
 
        result = ctx->addrs[ind->Index][ind->Swizzle];
@@ -614,7 +615,7 @@ static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
                                   LLVMValueRef vertex_dw_stride,
                                   LLVMValueRef base_addr)
 {
-       struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        struct tgsi_shader_info *info = &ctx->shader->selector->info;
        ubyte *name, *index, *array_first;
        int first, param;
@@ -713,7 +714,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
                                                LLVMValueRef vertex_index,
                                                LLVMValueRef param_index)
 {
-       struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
        LLVMValueRef param_stride, constant16;
 
@@ -757,7 +758,7 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_reg(
                                        const struct tgsi_full_dst_register *dst,
                                        const struct tgsi_full_src_register *src)
 {
-       struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        struct tgsi_shader_info *info = &ctx->shader->selector->info;
        ubyte *name, *index, *array_first;
        struct tgsi_full_src_register reg;
@@ -821,7 +822,7 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
                                 LLVMValueRef base, bool readonly_memory)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value, value2;
        LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
        LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);
@@ -863,7 +864,7 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
                             LLVMValueRef dw_addr)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value;
 
        if (swizzle == ~0) {
@@ -872,7 +873,7 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
                for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
                        values[chan] = lds_load(bld_base, type, chan, dw_addr);
 
-               return lp_build_gather_values(bld_base->base.gallivm, values,
+               return lp_build_gather_values(gallivm, values,
                                              TGSI_NUM_CHANNELS);
        }
 
@@ -904,7 +905,7 @@ static void lds_store(struct lp_build_tgsi_context *bld_base,
                      LLVMValueRef value)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
 
        dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
                            LLVMConstInt(ctx->i32, swizzle, 0));
@@ -922,7 +923,7 @@ static LLVMValueRef fetch_input_tcs(
        struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMValueRef dw_addr, stride;
 
-       stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+       stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 24, 8);
        dw_addr = get_tcs_in_current_patch_offset(ctx);
        dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
 
@@ -974,7 +975,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
                             LLVMValueRef dst[4])
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_dst_register *reg = &inst->Dst[0];
        const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info;
        unsigned chan_index;
@@ -1045,7 +1046,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
        }
 
        if (inst->Dst[0].Register.WriteMask == 0xF && !is_tess_factor) {
-               LLVMValueRef value = lp_build_gather_values(bld_base->base.gallivm,
+               LLVMValueRef value = lp_build_gather_values(gallivm,
                                                            values, 4);
                ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
                                            base, 0, 1, 0, true, false);
@@ -1058,11 +1059,10 @@ static LLVMValueRef fetch_input_gs(
        enum tgsi_opcode_type type,
        unsigned swizzle)
 {
-       struct lp_build_context *base = &bld_base->base;
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
        struct lp_build_context *uint = &ctx->bld_base.uint_bld;
-       struct gallivm_state *gallivm = base->gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef vtx_offset, soffset;
        unsigned vtx_offset_param;
        struct tgsi_shader_info *info = &shader->selector->info;
@@ -1083,7 +1083,7 @@ static LLVMValueRef fetch_input_gs(
                for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
                        values[chan] = fetch_input_gs(bld_base, reg, type, chan);
                }
-               return lp_build_gather_values(bld_base->base.gallivm, values,
+               return lp_build_gather_values(gallivm, values,
                                              TGSI_NUM_CHANNELS);
        }
 
@@ -1103,14 +1103,14 @@ static LLVMValueRef fetch_input_gs(
        param = si_shader_io_get_unique_index(semantic_name, semantic_index);
        soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
 
-       value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, uint->zero,
+       value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
                                     vtx_offset, soffset, 0, 1, 0, true);
        if (tgsi_type_is_64bit(type)) {
                LLVMValueRef value2;
                soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0);
 
                value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
-                                             uint->zero, vtx_offset, soffset,
+                                             ctx->i32_0, vtx_offset, soffset,
                                              0, 1, 0, true);
                return si_llvm_emit_fetch_64bit(bld_base, type,
                                                value, value2);
@@ -1174,10 +1174,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
                            LLVMValueRef face,
                            LLVMValueRef result[4])
 {
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-       struct lp_build_context *base = &bld_base->base;
-       struct lp_build_context *uint = &bld_base->uint_bld;
-       struct gallivm_state *gallivm = base->gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef attr_number;
        LLVMValueRef i, j;
 
@@ -1205,9 +1202,9 @@ static void interp_fs_input(struct si_shader_context *ctx,
                                                LLVMVectorType(ctx->f32, 2), "");
 
                i = LLVMBuildExtractElement(gallivm->builder, interp_param,
-                                               uint->zero, "");
+                                               ctx->i32_0, "");
                j = LLVMBuildExtractElement(gallivm->builder, interp_param,
-                                               uint->one, "");
+                                               ctx->i32_1, "");
        }
 
        if (semantic_name == TGSI_SEMANTIC_COLOR &&
@@ -1225,7 +1222,7 @@ static void interp_fs_input(struct si_shader_context *ctx,
                back_attr_number = LLVMConstInt(ctx->i32, back_attr_offset, 0);
 
                is_face_positive = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
-                                                face, uint->zero, "");
+                                                face, ctx->i32_0, "");
 
                for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
                        LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, 0);
@@ -1255,10 +1252,10 @@ static void interp_fs_input(struct si_shader_context *ctx,
                }
        } else if (semantic_name == TGSI_SEMANTIC_FOG) {
                if (interp) {
-                       result[0] = ac_build_fs_interp(&ctx->ac, uint->zero,
+                       result[0] = ac_build_fs_interp(&ctx->ac, ctx->i32_0,
                                                       attr_number, prim_mask, i, j);
                } else {
-                       result[0] = ac_build_fs_interp_mov(&ctx->ac, uint->zero,
+                       result[0] = ac_build_fs_interp_mov(&ctx->ac, ctx->i32_0,
                                                           LLVMConstInt(ctx->i32, 2, 0), /* P0 */
                                                           attr_number, prim_mask);
                }
@@ -1381,6 +1378,8 @@ static void declare_system_value(struct si_shader_context *ctx,
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value = 0;
 
+       assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
+
        switch (decl->Semantic.Name) {
        case TGSI_SEMANTIC_INSTANCEID:
                value = LLVMGetParam(ctx->main_fn,
@@ -1396,8 +1395,9 @@ static void declare_system_value(struct si_shader_context *ctx,
                break;
 
        case TGSI_SEMANTIC_VERTEXID_NOBASE:
-               value = LLVMGetParam(ctx->main_fn,
-                                    ctx->param_vertex_id);
+               /* Unused. Clarify the meaning in indexed vs. non-indexed
+                * draws if this is ever used again. */
+               assert(false);
                break;
 
        case TGSI_SEMANTIC_BASEVERTEX:
@@ -1589,6 +1589,46 @@ static void declare_system_value(struct si_shader_context *ctx,
                }
                break;
 
+       case TGSI_SEMANTIC_SUBGROUP_SIZE:
+               value = LLVMConstInt(ctx->i32, 64, 0);
+               break;
+
+       case TGSI_SEMANTIC_SUBGROUP_INVOCATION:
+               value = ac_get_thread_id(&ctx->ac);
+               break;
+
+       case TGSI_SEMANTIC_SUBGROUP_EQ_MASK:
+       {
+               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
+               id = LLVMBuildZExt(gallivm->builder, id, ctx->i64, "");
+               value = LLVMBuildShl(gallivm->builder, LLVMConstInt(ctx->i64, 1, 0), id, "");
+               value = LLVMBuildBitCast(gallivm->builder, value, ctx->v2i32, "");
+               break;
+       }
+
+       case TGSI_SEMANTIC_SUBGROUP_GE_MASK:
+       case TGSI_SEMANTIC_SUBGROUP_GT_MASK:
+       case TGSI_SEMANTIC_SUBGROUP_LE_MASK:
+       case TGSI_SEMANTIC_SUBGROUP_LT_MASK:
+       {
+               LLVMValueRef id = ac_get_thread_id(&ctx->ac);
+               if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK ||
+                   decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) {
+                       /* All bits set except LSB */
+                       value = LLVMConstInt(ctx->i64, -2, 0);
+               } else {
+                       /* All bits set */
+                       value = LLVMConstInt(ctx->i64, -1, 0);
+               }
+               id = LLVMBuildZExt(gallivm->builder, id, ctx->i64, "");
+               value = LLVMBuildShl(gallivm->builder, value, id, "");
+               if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK ||
+                   decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK)
+                       value = LLVMBuildNot(gallivm->builder, value, "");
+               value = LLVMBuildBitCast(gallivm->builder, value, ctx->v2i32, "");
+               break;
+       }
+
        default:
                assert(!"unknown system value");
                return;
@@ -1648,7 +1688,7 @@ static LLVMValueRef fetch_constant(
                for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
                        values[chan] = fetch_constant(bld_base, reg, type, chan);
 
-               return lp_build_gather_values(bld_base->base.gallivm, values, 4);
+               return lp_build_gather_values(&ctx->gallivm, values, 4);
        }
 
        buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
@@ -1721,7 +1761,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct lp_build_context *base = &bld_base->base;
-       LLVMBuilderRef builder = base->gallivm->builder;
+       LLVMBuilderRef builder = ctx->gallivm.builder;
        LLVMValueRef val[4];
        unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
        unsigned chan;
@@ -1791,7 +1831,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 
                        packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
                        args->out[chan] =
-                               LLVMBuildBitCast(base->gallivm->builder,
+                               LLVMBuildBitCast(ctx->gallivm.builder,
                                                 packed, ctx->f32, "");
                }
                break;
@@ -1930,7 +1970,7 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *
                                                  unsigned samplemask_param)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef coverage;
 
        /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
@@ -2308,15 +2348,15 @@ handle_semantic:
                if (shader->selector->info.writes_edgeflag) {
                        /* The output is a float, but the hw expects an integer
                         * with the first bit containing the edge flag. */
-                       edgeflag_value = LLVMBuildFPToUI(base->gallivm->builder,
+                       edgeflag_value = LLVMBuildFPToUI(ctx->gallivm.builder,
                                                         edgeflag_value,
                                                         ctx->i32, "");
                        edgeflag_value = lp_build_min(&bld_base->int_bld,
                                                      edgeflag_value,
-                                                     bld_base->int_bld.one);
+                                                     ctx->i32_1);
 
                        /* The LLVM intrinsic expects a float. */
-                       pos_args[1].out[1] = LLVMBuildBitCast(base->gallivm->builder,
+                       pos_args[1].out[1] = LLVMBuildBitCast(ctx->gallivm.builder,
                                                          edgeflag_value,
                                                          ctx->f32, "");
                }
@@ -2355,7 +2395,7 @@ handle_semantic:
 static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef invocation_id, rw_buffers, buffer, buffer_offset;
        LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
        uint64_t inputs;
@@ -2368,7 +2408,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 
        buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_oc_lds);
 
-       lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 13, 8);
+       lds_vertex_stride = unpack_param(ctx, SI_PARAM_TCS_IN_LAYOUT, 24, 8);
        lds_vertex_offset = LLVMBuildMul(gallivm->builder, invocation_id,
                                         lds_vertex_stride, "");
        lds_base = get_tcs_in_current_patch_offset(ctx);
@@ -2401,7 +2441,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
                                  LLVMValueRef tcs_out_current_patch_data_offset)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        struct si_shader *shader = ctx->shader;
        unsigned tess_inner_index, tess_outer_index;
        LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
@@ -2419,7 +2459,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
         */
        lp_build_if(&if_ctx, gallivm,
                    LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
-                                 invocation_id, bld_base->uint_bld.zero, ""));
+                                 invocation_id, ctx->i32_0, ""));
 
        /* Determine the layout of one tess factor element in the buffer. */
        switch (shader->key.part.tcs.epilog.prim_mode) {
@@ -2500,7 +2540,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
        lp_build_if(&inner_if_ctx, gallivm,
                    LLVMBuildICmp(gallivm->builder, LLVMIntEQ,
-                                 rel_patch_id, bld_base->uint_bld.zero, ""));
+                                 rel_patch_id, ctx->i32_0, ""));
 
        /* Store the dynamic HS control word. */
        ac_build_buffer_store_dword(&ctx->ac, buffer,
@@ -2571,7 +2611,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
        tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
 
        /* Return epilog parameters from this function. */
-       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       LLVMBuilderRef builder = ctx->gallivm.builder;
        LLVMValueRef ret = ctx->return_value;
        LLVMValueRef rw_buffers, rw0, rw1, tf_soffset;
        unsigned vgpr;
@@ -2582,9 +2622,9 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
        rw_buffers = LLVMBuildPtrToInt(builder, rw_buffers, ctx->i64, "");
        rw_buffers = LLVMBuildBitCast(builder, rw_buffers, ctx->v2i32, "");
        rw0 = LLVMBuildExtractElement(builder, rw_buffers,
-                                     bld_base->uint_bld.zero, "");
+                                     ctx->i32_0, "");
        rw1 = LLVMBuildExtractElement(builder, rw_buffers,
-                                     bld_base->uint_bld.one, "");
+                                     ctx->i32_1, "");
        ret = LLVMBuildInsertValue(builder, ret, rw0, 0, "");
        ret = LLVMBuildInsertValue(builder, ret, rw1, 1, "");
 
@@ -2618,12 +2658,12 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        unsigned i, chan;
        LLVMValueRef vertex_id = LLVMGetParam(ctx->main_fn,
                                              ctx->param_rel_auto_id);
        LLVMValueRef vertex_dw_stride =
-               unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 13, 8);
+               unpack_param(ctx, SI_PARAM_LS_OUT_LAYOUT, 24, 8);
        LLVMValueRef base_dw_addr = LLVMBuildMul(gallivm->builder, vertex_id,
                                                 vertex_dw_stride, "");
 
@@ -2647,7 +2687,7 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
 static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        struct si_shader *es = ctx->shader;
        struct tgsi_shader_info *info = &es->selector->info;
        LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
@@ -2690,7 +2730,7 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
 static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        struct tgsi_shader_info *info = &ctx->shader->selector->info;
        struct si_shader_output_values *outputs = NULL;
        int i,j;
@@ -2823,7 +2863,7 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
                if (stencil) {
                        /* Stencil should be in X[23:16]. */
                        stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, stencil);
-                       stencil = LLVMBuildShl(base->gallivm->builder, stencil,
+                       stencil = LLVMBuildShl(ctx->gallivm.builder, stencil,
                                               LLVMConstInt(ctx->i32, 16, 0), "");
                        args.out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, stencil);
                        mask |= 0x3;
@@ -2971,9 +3011,8 @@ static void si_llvm_return_fs_outputs(struct lp_build_tgsi_context *bld_base)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
-       struct lp_build_context *base = &bld_base->base;
        struct tgsi_shader_info *info = &shader->selector->info;
-       LLVMBuilderRef builder = base->gallivm->builder;
+       LLVMBuilderRef builder = ctx->gallivm.builder;
        unsigned i, j, first_vgpr, vgpr;
 
        LLVMValueRef color[8][4] = {};
@@ -3057,7 +3096,7 @@ static LLVMValueRef get_buffer_size(
        LLVMValueRef descriptor)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef size =
                LLVMBuildExtractElement(builder, descriptor,
@@ -3089,16 +3128,43 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
 /* Prevent optimizations (at least of memory accesses) across the current
  * point in the program by emitting empty inline assembly that is marked as
  * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
  */
-#if 0 /* unused currently */
-static void emit_optimization_barrier(struct si_shader_context *ctx)
+static void emit_optimization_barrier(struct si_shader_context *ctx,
+                                     LLVMValueRef *pvgpr)
 {
+       static int counter = 0;
+
        LLVMBuilderRef builder = ctx->gallivm.builder;
-       LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
-       LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, "", "", true, false);
-       LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+       char code[16];
+
+       snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
+
+       if (!pvgpr) {
+               LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+               LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
+               LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+       } else {
+               LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
+               LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
+               LLVMValueRef vgpr = *pvgpr;
+               LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+               unsigned vgpr_size = llvm_get_type_size(vgpr_type);
+               LLVMValueRef vgpr0;
+
+               assert(vgpr_size % 4 == 0);
+
+               vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
+               vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+               vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
+               vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
+               vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+               *pvgpr = vgpr;
+       }
 }
-#endif
 
 /* Combine these with & instead of |. */
 #define NOOP_WAITCNT 0xf7f
@@ -3304,7 +3370,7 @@ static LLVMValueRef image_fetch_coords(
                unsigned src)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        unsigned target = inst->Memory.Texture;
        unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
@@ -3314,18 +3380,18 @@ static LLVMValueRef image_fetch_coords(
 
        for (chan = 0; chan < num_coords; ++chan) {
                tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
-               tmp = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+               tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
                coords[chan] = tmp;
        }
 
        /* 1D textures are allocated and used as 2D on GFX9. */
        if (ctx->screen->b.chip_class >= GFX9) {
                if (target == TGSI_TEXTURE_1D) {
-                       coords[1] = bld_base->uint_bld.zero;
+                       coords[1] = ctx->i32_0;
                        num_coords++;
                } else if (target == TGSI_TEXTURE_1D_ARRAY) {
                        coords[2] = coords[1];
-                       coords[1] = bld_base->uint_bld.zero;
+                       coords[1] = ctx->i32_0;
                }
        }
 
@@ -3417,12 +3483,12 @@ static void load_fetch_args(
                struct lp_build_emit_data * emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        unsigned target = inst->Memory.Texture;
        LLVMValueRef rsrc;
 
-       emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
+       emit_data->dst_type = ctx->v4f32;
 
        if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
                LLVMBuilderRef builder = gallivm->builder;
@@ -3432,9 +3498,9 @@ static void load_fetch_args(
                rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
 
                tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
-               offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+               offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
 
-               buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
+               buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
                                   offset, false, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
                LLVMValueRef coords;
@@ -3444,7 +3510,7 @@ static void load_fetch_args(
 
                if (target == TGSI_TEXTURE_BUFFER) {
                        buffer_append_args(ctx, emit_data, rsrc, coords,
-                                          bld_base->uint_bld.zero, false, false);
+                                          ctx->i32_0, false, false);
                } else {
                        emit_data->args[0] = coords;
                        emit_data->args[1] = rsrc;
@@ -3530,18 +3596,17 @@ static void load_emit_memory(
                struct lp_build_emit_data *emit_data)
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
-       struct lp_build_context *base = &ctx->bld_base.base;
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        unsigned writemask = inst->Dst[0].Register.WriteMask;
        LLVMValueRef channels[4], ptr, derived_ptr, index;
        int chan;
 
-       ptr = get_memory_ptr(ctx, inst, base->elem_type, 1);
+       ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
 
        for (chan = 0; chan < 4; ++chan) {
                if (!(writemask & (1 << chan))) {
-                       channels[chan] = LLVMGetUndef(base->elem_type);
+                       channels[chan] = LLVMGetUndef(ctx->f32);
                        continue;
                }
 
@@ -3620,7 +3685,7 @@ static void load_emit(
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        const struct tgsi_shader_info *info = &ctx->shader->selector->info;
@@ -3673,7 +3738,7 @@ static void store_fetch_args(
                struct lp_build_emit_data * emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        struct tgsi_full_src_register memory;
@@ -3700,9 +3765,9 @@ static void store_fetch_args(
                rsrc = shader_buffer_fetch_rsrc(ctx, &memory);
 
                tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
-               offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+               offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
 
-               buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
+               buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
                                   offset, false, false);
        } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) {
                unsigned target = inst->Memory.Texture;
@@ -3721,7 +3786,7 @@ static void store_fetch_args(
                if (target == TGSI_TEXTURE_BUFFER) {
                        image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
                        buffer_append_args(ctx, emit_data, rsrc, coords,
-                                          bld_base->uint_bld.zero, false, force_glc);
+                                          ctx->i32_0, false, force_glc);
                } else {
                        emit_data->args[1] = coords;
                        image_fetch_rsrc(bld_base, &memory, true, target,
@@ -3742,7 +3807,6 @@ static void store_emit_buffer(
        const struct tgsi_full_instruction *inst = emit_data->inst;
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
-       struct lp_build_context *uint_bld = &ctx->bld_base.uint_bld;
        LLVMValueRef base_data = emit_data->args[0];
        LLVMValueRef base_offset = emit_data->args[3];
        unsigned writemask = inst->Dst[0].Register.WriteMask;
@@ -3774,13 +3838,13 @@ static void store_emit_buffer(
                                LLVMConstInt(ctx->i32, start, 0), "");
                        data = LLVMBuildInsertElement(
                                builder, LLVMGetUndef(v2f32), tmp,
-                               uint_bld->zero, "");
+                               ctx->i32_0, "");
 
                        tmp = LLVMBuildExtractElement(
                                builder, base_data,
                                LLVMConstInt(ctx->i32, start + 1, 0), "");
                        data = LLVMBuildInsertElement(
-                               builder, data, tmp, uint_bld->one, "");
+                               builder, data, tmp, ctx->i32_1, "");
 
                        intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
                } else {
@@ -3814,13 +3878,12 @@ static void store_emit_memory(
 {
        const struct tgsi_full_instruction *inst = emit_data->inst;
        struct gallivm_state *gallivm = &ctx->gallivm;
-       struct lp_build_context *base = &ctx->bld_base.base;
        LLVMBuilderRef builder = gallivm->builder;
        unsigned writemask = inst->Dst[0].Register.WriteMask;
        LLVMValueRef ptr, derived_ptr, data, index;
        int chan;
 
-       ptr = get_memory_ptr(ctx, inst, base->elem_type, 0);
+       ptr = get_memory_ptr(ctx, inst, ctx->f32, 0);
 
        for (chan = 0; chan < 4; ++chan) {
                if (!(writemask & (1 << chan))) {
@@ -3839,7 +3902,7 @@ static void store_emit(
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        const struct tgsi_shader_info *info = &ctx->shader->selector->info;
@@ -3892,21 +3955,21 @@ static void atomic_fetch_args(
                struct lp_build_emit_data * emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        LLVMValueRef data1, data2;
        LLVMValueRef rsrc;
        LLVMValueRef tmp;
 
-       emit_data->dst_type = bld_base->base.elem_type;
+       emit_data->dst_type = ctx->f32;
 
        tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
-       data1 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+       data1 = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
 
        if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
                tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
-               data2 = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+               data2 = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
        }
 
        /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
@@ -3922,9 +3985,9 @@ static void atomic_fetch_args(
                rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0]);
 
                tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
-               offset = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.elem_type, "");
+               offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
 
-               buffer_append_args(ctx, emit_data, rsrc, bld_base->uint_bld.zero,
+               buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
                                   offset, true, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) {
                unsigned target = inst->Memory.Texture;
@@ -3935,7 +3998,7 @@ static void atomic_fetch_args(
 
                if (target == TGSI_TEXTURE_BUFFER) {
                        buffer_append_args(ctx, emit_data, rsrc, coords,
-                                          bld_base->uint_bld.zero, true, false);
+                                          ctx->i32_0, true, false);
                } else {
                        emit_data->args[emit_data->arg_count++] = coords;
                        emit_data->args[emit_data->arg_count++] = rsrc;
@@ -4020,7 +4083,7 @@ static void atomic_emit(
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        char intrinsic_name[40];
@@ -4051,10 +4114,10 @@ static void atomic_emit(
        }
 
        tmp = lp_build_intrinsic(
-               builder, intrinsic_name, bld_base->uint_bld.elem_type,
+               builder, intrinsic_name, ctx->i32,
                emit_data->args, emit_data->arg_count, 0);
        emit_data->output[emit_data->chan] =
-               LLVMBuildBitCast(builder, tmp, bld_base->base.elem_type, "");
+               LLVMBuildBitCast(builder, tmp, ctx->f32, "");
 }
 
 static void set_tex_fetch_args(struct si_shader_context *ctx,
@@ -4146,7 +4209,7 @@ static void resq_fetch_args(
                image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
                                 &res_ptr);
                set_tex_fetch_args(ctx, emit_data, image_target,
-                                  res_ptr, NULL, &bld_base->uint_bld.zero, 1,
+                                  res_ptr, NULL, &ctx->i32_0, 1,
                                   0xf);
        }
 }
@@ -4157,7 +4220,7 @@ static void resq_emit(
                struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction *inst = emit_data->inst;
        LLVMValueRef out;
@@ -4358,7 +4421,7 @@ static void tex_fetch_args(
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_instruction *inst = emit_data->inst;
        unsigned opcode = inst->Instruction.Opcode;
        unsigned target = inst->Texture.Texture;
@@ -4379,7 +4442,7 @@ static void tex_fetch_args(
                emit_data->dst_type = ctx->v4f32;
                emit_data->args[0] = LLVMBuildBitCast(gallivm->builder, res_ptr,
                                                      ctx->v16i8, "");
-               emit_data->args[1] = bld_base->uint_bld.zero;
+               emit_data->args[1] = ctx->i32_0;
                emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
                emit_data->arg_count = 3;
                return;
@@ -4545,7 +4608,7 @@ static void tex_fetch_args(
 
                /* Use 0.5, so that we don't sample the border color. */
                if (opcode == TGSI_OPCODE_TXF)
-                       filler = bld_base->uint_bld.zero;
+                       filler = ctx->i32_0;
                else
                        filler = LLVMConstReal(ctx->f32, 0.5);
 
@@ -4592,7 +4655,6 @@ static void tex_fetch_args(
         */
        if (target == TGSI_TEXTURE_2D_MSAA ||
            target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
-               struct lp_build_context *uint_bld = &bld_base->uint_bld;
                struct lp_build_emit_data txf_emit_data = *emit_data;
                LLVMValueRef txf_address[4];
                /* We only need .xy for non-arrays, and .xyz for arrays. */
@@ -4619,7 +4681,7 @@ static void tex_fetch_args(
                LLVMValueRef fmask =
                        LLVMBuildExtractElement(gallivm->builder,
                                                txf_emit_data.output[0],
-                                               uint_bld->zero, "");
+                                               ctx->i32_0, "");
 
                unsigned sample_chan = txf_count; /* the sample index is last */
 
@@ -4641,11 +4703,11 @@ static void tex_fetch_args(
 
                LLVMValueRef fmask_word1 =
                        LLVMBuildExtractElement(gallivm->builder, fmask_desc,
-                                               uint_bld->one, "");
+                                               ctx->i32_1, "");
 
                LLVMValueRef word1_is_nonzero =
                        LLVMBuildICmp(gallivm->builder, LLVMIntNE,
-                                     fmask_word1, uint_bld->zero, "");
+                                     fmask_word1, ctx->i32_0, "");
 
                /* Replace the MSAA sample index. */
                address[sample_chan] =
@@ -4755,8 +4817,7 @@ static void si_lower_gather4_integer(struct si_shader_context *ctx,
                txq_emit_data.inst = &txq_inst;
                txq_emit_data.dst_type = ctx->v4i32;
                set_tex_fetch_args(ctx, &txq_emit_data, target,
-                                  args->resource, NULL,
-                                  &ctx->bld_base.uint_bld.zero,
+                                  args->resource, NULL, &ctx->i32_0,
                                   1, 0xf);
                txq_emit(NULL, &ctx->bld_base, &txq_emit_data);
 
@@ -4886,7 +4947,7 @@ static void si_llvm_emit_txqs(
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef res, samples;
        LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
@@ -4915,7 +4976,7 @@ static void si_llvm_emit_ddxy(
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        unsigned opcode = emit_data->info->opcode;
        LLVMValueRef val;
        int idx;
@@ -4947,7 +5008,7 @@ static LLVMValueRef si_llvm_emit_ddxy_interp(
        LLVMValueRef interp_ij)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef result[4], a;
        unsigned i;
 
@@ -4966,7 +5027,7 @@ static void interp_fetch_args(
        struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_instruction *inst = emit_data->inst;
 
        if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) {
@@ -5011,8 +5072,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct lp_build_context *uint = &bld_base->uint_bld;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef interp_param;
        const struct tgsi_full_instruction *inst = emit_data->inst;
        int input_index = inst->Src[0].Register.Index;
@@ -5077,7 +5137,7 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
 
                        ij_out[i] = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");
                }
-               interp_param = lp_build_gather_values(bld_base->base.gallivm, ij_out, 2);
+               interp_param = lp_build_gather_values(gallivm, ij_out, 2);
        }
 
        for (chan = 0; chan < 4; chan++) {
@@ -5091,9 +5151,9 @@ static void build_interp_intrinsic(const struct lp_build_tgsi_action *action,
                        interp_param = LLVMBuildBitCast(gallivm->builder,
                                interp_param, LLVMVectorType(ctx->f32, 2), "");
                        LLVMValueRef i = LLVMBuildExtractElement(
-                               gallivm->builder, interp_param, uint->zero, "");
+                               gallivm->builder, interp_param, ctx->i32_0, "");
                        LLVMValueRef j = LLVMBuildExtractElement(
-                               gallivm->builder, interp_param, uint->one, "");
+                               gallivm->builder, interp_param, ctx->i32_1, "");
                        emit_data->output[chan] = ac_build_fs_interp(&ctx->ac,
                                llvm_chan, attr_number, params,
                                i, j);
@@ -5115,8 +5175,13 @@ static LLVMValueRef si_emit_ballot(struct si_shader_context *ctx,
                LLVMConstInt(ctx->i32, LLVMIntNE, 0)
        };
 
-       if (LLVMTypeOf(value) != ctx->i32)
-               args[0] = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");
+       /* We currently have no other way to prevent LLVM from lifting the icmp
+        * calls to a dominating basic block.
+        */
+       emit_optimization_barrier(ctx, &args[0]);
+
+       if (LLVMTypeOf(args[0]) != ctx->i32)
+               args[0] = LLVMBuildBitCast(gallivm->builder, args[0], ctx->i32, "");
 
        return lp_build_intrinsic(gallivm->builder,
                                  "llvm.amdgcn.icmp.i32",
@@ -5183,6 +5248,61 @@ static void vote_eq_emit(
                LLVMBuildSExt(gallivm->builder, tmp, ctx->i32, "");
 }
 
+static void ballot_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       LLVMBuilderRef builder = ctx->gallivm.builder;
+       LLVMValueRef tmp;
+
+       tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
+       tmp = si_emit_ballot(ctx, tmp);
+       tmp = LLVMBuildBitCast(builder, tmp, ctx->v2i32, "");
+
+       emit_data->output[0] = LLVMBuildExtractElement(builder, tmp, ctx->i32_0, "");
+       emit_data->output[1] = LLVMBuildExtractElement(builder, tmp, ctx->i32_1, "");
+}
+
+static void read_invoc_fetch_args(
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                                0, emit_data->src_chan);
+
+       /* Always read the source invocation (= lane) from the X channel. */
+       emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
+                                                1, TGSI_CHAN_X);
+       emit_data->arg_count = 2;
+}
+
+static void read_lane_emit(
+       const struct lp_build_tgsi_action *action,
+       struct lp_build_tgsi_context *bld_base,
+       struct lp_build_emit_data *emit_data)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       LLVMBuilderRef builder = ctx->gallivm.builder;
+
+       /* We currently have no other way to prevent LLVM from lifting the icmp
+        * calls to a dominating basic block.
+        */
+       emit_optimization_barrier(ctx, &emit_data->args[0]);
+
+       for (unsigned i = 0; i < emit_data->arg_count; ++i) {
+               emit_data->args[i] = LLVMBuildBitCast(builder, emit_data->args[i],
+                                                     ctx->i32, "");
+       }
+
+       emit_data->output[emit_data->chan] =
+               ac_build_intrinsic(&ctx->ac, action->intr_name,
+                                  ctx->i32, emit_data->args, emit_data->arg_count,
+                                  AC_FUNC_ATTR_READNONE |
+                                  AC_FUNC_ATTR_CONVERGENT);
+}
+
 static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
                                       struct lp_build_emit_data *emit_data)
 {
@@ -5208,7 +5328,7 @@ static void si_llvm_emit_vertex(
        struct lp_build_context *uint = &bld_base->uint_bld;
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        struct lp_build_if_state if_state;
        LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
                                            SI_PARAM_GS2VS_OFFSET);
@@ -5308,7 +5428,7 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
                                 struct lp_build_emit_data *emit_data)
 {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
 
        /* SI only (thanks to a hw bug workaround):
         * The real barrier instruction isn’t needed, because an entire patch
@@ -5440,11 +5560,9 @@ static unsigned llvm_get_type_size(LLVMTypeRef type)
 static void declare_tess_lds(struct si_shader_context *ctx)
 {
        struct gallivm_state *gallivm = &ctx->gallivm;
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-       struct lp_build_context *uint = &bld_base->uint_bld;
 
        unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
-       ctx->lds = LLVMBuildIntToPtr(gallivm->builder, uint->zero,
+       ctx->lds = LLVMBuildIntToPtr(gallivm->builder, ctx->i32_0,
                LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
                "tess_lds");
 }
@@ -5469,7 +5587,7 @@ static unsigned si_get_max_workgroup_size(struct si_shader *shader)
 static void create_function(struct si_shader_context *ctx)
 {
        struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        struct si_shader *shader = ctx->shader;
        LLVMTypeRef params[SI_NUM_PARAMS + SI_MAX_ATTRIBS], v3i32;
        LLVMTypeRef returns[16+32*4];
@@ -5734,7 +5852,7 @@ static void create_function(struct si_shader_context *ctx)
  */
 static void preload_ring_buffers(struct si_shader_context *ctx)
 {
-       struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
 
        LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
@@ -5761,7 +5879,6 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
                        ac_build_indexed_load_const(&ctx->ac, buf_ptr, offset);
        } else if (ctx->type == PIPE_SHADER_GEOMETRY) {
                const struct si_shader_selector *sel = ctx->shader->selector;
-               struct lp_build_context *uint = &ctx->bld_base.uint_bld;
                LLVMValueRef offset = LLVMConstInt(ctx->i32, SI_RING_GSVS, 0);
                LLVMValueRef base_ring;
 
@@ -5796,20 +5913,20 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
                        num_records = 64;
 
                        ring = LLVMBuildBitCast(builder, base_ring, v2i64, "");
-                       tmp = LLVMBuildExtractElement(builder, ring, uint->zero, "");
+                       tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_0, "");
                        tmp = LLVMBuildAdd(builder, tmp,
                                           LLVMConstInt(ctx->i64,
                                                        stream_offset, 0), "");
                        stream_offset += stride * 64;
 
-                       ring = LLVMBuildInsertElement(builder, ring, tmp, uint->zero, "");
+                       ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_0, "");
                        ring = LLVMBuildBitCast(builder, ring, ctx->v4i32, "");
-                       tmp = LLVMBuildExtractElement(builder, ring, uint->one, "");
+                       tmp = LLVMBuildExtractElement(builder, ring, ctx->i32_1, "");
                        tmp = LLVMBuildOr(builder, tmp,
                                LLVMConstInt(ctx->i32,
                                             S_008F04_STRIDE(stride) |
                                             S_008F04_SWIZZLE_ENABLE(1), 0), "");
-                       ring = LLVMBuildInsertElement(builder, ring, tmp, uint->one, "");
+                       ring = LLVMBuildInsertElement(builder, ring, tmp, ctx->i32_1, "");
                        ring = LLVMBuildInsertElement(builder, ring,
                                        LLVMConstInt(ctx->i32, num_records, 0),
                                        LLVMConstInt(ctx->i32, 2, 0), "");
@@ -5837,8 +5954,7 @@ static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
                                         LLVMValueRef param_rw_buffers,
                                         unsigned param_pos_fixed_pt)
 {
-       struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
-       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef slot, desc, offset, row, bit, address[2];
 
@@ -6378,7 +6494,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
        if (gs_selector->so.num_outputs)
                stream_id = unpack_param(&ctx, ctx.param_streamout_config, 24, 2);
        else
-               stream_id = uint->zero;
+               stream_id = ctx.i32_0;
 
        /* Fill in output information. */
        for (i = 0; i < gsinfo->num_outputs; ++i) {
@@ -6428,7 +6544,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                                outputs[i].values[chan] =
                                        ac_build_buffer_load(&ctx.ac,
                                                             ctx.gsvs_ring[0], 1,
-                                                            uint->zero, voffset,
+                                                            ctx.i32_0, voffset,
                                                             soffset, 0, 1, 1, true);
                        }
                }
@@ -6453,14 +6569,14 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
        /* Dump LLVM IR before any optimization passes */
        if (sscreen->b.debug_flags & DBG_PREOPT_IR &&
            r600_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
-               ac_dump_module(bld_base->base.gallivm->module);
+               ac_dump_module(ctx.gallivm.module);
 
        si_llvm_finalize_module(&ctx,
                r600_extra_shader_checks(&sscreen->b, PIPE_SHADER_GEOMETRY));
 
        r = si_compile_llvm(sscreen, &ctx.shader->binary,
                            &ctx.shader->config, ctx.tm,
-                           bld_base->base.gallivm->module,
+                           ctx.gallivm.module,
                            debug, PIPE_SHADER_GEOMETRY,
                            "GS Copy Shader");
        if (!r) {
@@ -6635,6 +6751,12 @@ static void si_init_shader_ctx(struct si_shader_context *ctx,
        bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit;
        bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit;
        bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit;
+       bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit;
+       bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane";
+       bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit;
+       bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane";
+       bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = read_invoc_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
 
        bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
        bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
@@ -6876,7 +6998,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
                int i;
                for (i = 0; i < 4; i++) {
                        ctx->gs_next_vertex[i] =
-                               lp_build_alloca(bld_base->base.gallivm,
+                               lp_build_alloca(&ctx->gallivm,
                                                ctx->i32, "");
                }
        }
@@ -7356,7 +7478,6 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 {
        struct si_shader_selector *sel = shader->selector;
        struct si_shader_context ctx;
-       struct lp_build_tgsi_context *bld_base;
        LLVMModuleRef mod;
        int r = -1;
 
@@ -7376,7 +7497,6 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
        shader->info.uses_instanceid = sel->info.uses_instanceid;
 
-       bld_base = &ctx.bld_base;
        ctx.load_system_value = declare_system_value;
 
        if (!si_compile_tgsi_main(&ctx, shader)) {
@@ -7469,7 +7589,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                si_build_wrapper_function(&ctx, parts, need_prolog ? 3 : 2, need_prolog ? 1 : 0);
        }
 
-       mod = bld_base->base.gallivm->module;
+       mod = ctx.gallivm.module;
 
        /* Dump LLVM IR before any optimization passes */
        if (sscreen->b.debug_flags & DBG_PREOPT_IR &&