radeonsi: pass llvm type to si_llvm_emit_fetch_64bit()
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index 16036e3163792ed9332245bc6cf68bd65edbba5e..c3b5f58cd26a9bc788bdc0aea401f898a76f8ebd 100644 (file)
@@ -105,7 +105,7 @@ enum {
 
 static bool is_merged_shader(struct si_shader *shader)
 {
-       if (shader->selector->screen->b.chip_class <= VI)
+       if (shader->selector->screen->info.chip_class <= VI)
                return false;
 
        return shader->key.as_ls ||
@@ -407,7 +407,7 @@ static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
                return LLVMConstInt(ctx->i32, stride * 4, 0);
 
        case PIPE_SHADER_TESS_CTRL:
-               if (ctx->screen->b.chip_class >= GFX9 &&
+               if (ctx->screen->info.chip_class >= GFX9 &&
                    ctx->shader->is_monolithic) {
                        stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
                        return LLVMConstInt(ctx->i32, stride * 4, 0);
@@ -1063,7 +1063,8 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
        value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset,
                                   swizzle * 4 + 4, 1, 0, can_speculate, false);
 
-       return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
+       return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
+                                       value, value2);
 }
 
 /**
@@ -1096,7 +1097,8 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
 
                lo = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle, dw_addr);
                hi = lds_load(bld_base, TGSI_TYPE_UNSIGNED, swizzle + 1, dw_addr);
-               return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+               return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
+                                               lo, hi);
        }
 
        dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
@@ -1114,13 +1116,11 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
  * \param dw_addr      address in dwords
  * \param value                value to store
  */
-static void lds_store(struct lp_build_tgsi_context *bld_base,
+static void lds_store(struct si_shader_context *ctx,
                      unsigned dw_offset_imm, LLVMValueRef dw_addr,
                      LLVMValueRef value)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
-       dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+       dw_addr = lp_build_add(&ctx->bld_base.uint_bld, dw_addr,
                            LLVMConstInt(ctx->i32, dw_offset_imm, 0));
 
        ac_lds_store(&ctx->ac, dw_addr, value);
@@ -1266,7 +1266,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 
                /* Skip LDS stores if there is no LDS read of this output. */
                if (!skip_lds_store)
-                       lds_store(bld_base, chan_index, dw_addr, value);
+                       lds_store(ctx, chan_index, dw_addr, value);
 
                value = ac_to_integer(&ctx->ac, value);
                values[chan_index] = value;
@@ -1323,7 +1323,7 @@ static LLVMValueRef fetch_input_gs(
        param = si_shader_io_get_unique_index(semantic_name, semantic_index);
 
        /* GFX9 has the ESGS ring in LDS. */
-       if (ctx->screen->b.chip_class >= GFX9) {
+       if (ctx->screen->info.chip_class >= GFX9) {
                unsigned index = reg->Dimension.Index;
 
                switch (index / 2) {
@@ -1377,7 +1377,7 @@ static LLVMValueRef fetch_input_gs(
                value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
                                              ctx->i32_0, vtx_offset, soffset,
                                              0, 1, 0, true, false);
-               return si_llvm_emit_fetch_64bit(bld_base, type,
+               return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                value, value2);
        }
        return bitcast(bld_base, type, value);
@@ -1980,7 +1980,8 @@ static LLVMValueRef fetch_constant(
 
                lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
                hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle + 1);
-               return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+               return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
+                                               lo, hi);
        }
 
        idx = reg->Register.Index * 4 + swizzle;
@@ -2009,7 +2010,7 @@ static LLVMValueRef fetch_constant(
                 * s_buffer_load_dword (that we have to prevent) is when we use use
                 * a literal offset where we don't need bounds checking.
                 */
-               if (ctx->screen->b.chip_class == SI &&
+               if (ctx->screen->info.chip_class == SI &&
                     HAVE_LLVM < 0x0600 &&
                     !reg->Register.Indirect) {
                        addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
@@ -2083,13 +2084,12 @@ static LLVMValueRef si_llvm_pack_two_int32_as_int16(struct si_shader_context *ct
 }
 
 /* Initialize arguments for the shader export intrinsic */
-static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
+static void si_llvm_init_export_args(struct si_shader_context *ctx,
                                     LLVMValueRef *values,
                                     unsigned target,
                                     struct ac_export_args *args)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct lp_build_context *base = &bld_base->base;
+       LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
        LLVMBuilderRef builder = ctx->ac.builder;
        LLVMValueRef val[4];
        unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
@@ -2120,10 +2120,10 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        }
 
        args->compr = false;
-       args->out[0] = base->undef;
-       args->out[1] = base->undef;
-       args->out[2] = base->undef;
-       args->out[3] = base->undef;
+       args->out[0] = f32undef;
+       args->out[1] = f32undef;
+       args->out[2] = f32undef;
+       args->out[3] = f32undef;
 
        switch (spi_shader_col_format) {
        case V_028714_SPI_SHADER_ZERO:
@@ -2182,10 +2182,10 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
        case V_028714_SPI_SHADER_SNORM16_ABGR:
                for (chan = 0; chan < 4; chan++) {
                        /* Clamp between [-1, 1]. */
-                       val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN,
+                       val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN,
                                                              values[chan],
                                                              LLVMConstReal(ctx->f32, 1));
-                       val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
+                       val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX,
                                                              val[chan],
                                                              LLVMConstReal(ctx->f32, -1));
                        /* Convert to a signed integer in [-32767, 32767]. */
@@ -2215,7 +2215,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                /* Clamp. */
                for (chan = 0; chan < 4; chan++) {
                        val[chan] = ac_to_integer(&ctx->ac, values[chan]);
-                       val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN,
+                       val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN,
                                        val[chan],
                                        chan == 3 ? max_alpha : max_rgb);
                }
@@ -2239,10 +2239,10 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
                /* Clamp. */
                for (chan = 0; chan < 4; chan++) {
                        val[chan] = ac_to_integer(&ctx->ac, values[chan]);
-                       val[chan] = lp_build_emit_llvm_binary(bld_base,
+                       val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
                                        TGSI_OPCODE_IMIN,
                                        val[chan], chan == 3 ? max_alpha : max_rgb);
-                       val[chan] = lp_build_emit_llvm_binary(bld_base,
+                       val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
                                        TGSI_OPCODE_IMAX,
                                        val[chan], chan == 3 ? min_alpha : min_rgb);
                }
@@ -2312,11 +2312,9 @@ static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *
        return LLVMBuildFMul(ctx->ac.builder, alpha, coverage, "");
 }
 
-static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base,
+static void si_llvm_emit_clipvertex(struct si_shader_context *ctx,
                                    struct ac_export_args *pos, LLVMValueRef *out_elts)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct lp_build_context *base = &bld_base->base;
        unsigned reg_index;
        unsigned chan;
        unsigned const_chan;
@@ -2343,8 +2341,8 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base,
                                base_elt = buffer_load_const(ctx, const_resource,
                                                             addr);
                                args->out[chan] =
-                                       lp_build_add(base, args->out[chan],
-                                                    lp_build_mul(base, base_elt,
+                                       lp_build_add(&ctx->bld_base.base, args->out[chan],
+                                                    lp_build_mul(&ctx->bld_base.base, base_elt,
                                                                  out_elts[const_chan]));
                        }
                }
@@ -2514,7 +2512,7 @@ static void si_export_param(struct si_shader_context *ctx, unsigned index,
 {
        struct ac_export_args args;
 
-       si_llvm_init_export_args(&ctx->bld_base, values,
+       si_llvm_init_export_args(ctx, values,
                                 V_008DFC_SQ_EXP_PARAM + index, &args);
        ac_build_export(&ctx->ac, &args);
 }
@@ -2567,11 +2565,10 @@ static void si_build_param_exports(struct si_shader_context *ctx,
 }
 
 /* Generate export instructions for hardware VS shader stage */
-static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
+static void si_llvm_export_vs(struct si_shader_context *ctx,
                              struct si_shader_output_values *outputs,
                              unsigned noutput)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
        struct si_shader *shader = ctx->shader;
        struct ac_export_args pos_args[4] = {};
        LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
@@ -2582,7 +2579,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
        for (i = 0; i < noutput; i++) {
                switch (outputs[i].semantic_name) {
                case TGSI_SEMANTIC_POSITION:
-                       si_llvm_init_export_args(bld_base, outputs[i].values,
+                       si_llvm_init_export_args(ctx, outputs[i].values,
                                                 V_008DFC_SQ_EXP_POS, &pos_args[0]);
                        break;
                case TGSI_SEMANTIC_PSIZE:
@@ -2600,14 +2597,14 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
                case TGSI_SEMANTIC_CLIPDIST:
                        if (!shader->key.opt.clip_disable) {
                                unsigned index = 2 + outputs[i].semantic_index;
-                               si_llvm_init_export_args(bld_base, outputs[i].values,
+                               si_llvm_init_export_args(ctx, outputs[i].values,
                                                         V_008DFC_SQ_EXP_POS + index,
                                                         &pos_args[index]);
                        }
                        break;
                case TGSI_SEMANTIC_CLIPVERTEX:
                        if (!shader->key.opt.clip_disable) {
-                               si_llvm_emit_clipvertex(bld_base, pos_args,
+                               si_llvm_emit_clipvertex(ctx, pos_args,
                                                        outputs[i].values);
                        }
                        break;
@@ -2662,7 +2659,7 @@ static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
                        pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
                }
 
-               if (ctx->screen->b.chip_class >= GFX9) {
+               if (ctx->screen->info.chip_class >= GFX9) {
                        /* GFX9 has the layer in out.z[10:0] and the viewport
                         * index in out.z[19:16].
                         */
@@ -2874,7 +2871,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
        /* Store the dynamic HS control word. */
        offset = 0;
-       if (ctx->screen->b.chip_class <= VI) {
+       if (ctx->screen->info.chip_class <= VI) {
                ac_build_buffer_store_dword(&ctx->ac, buffer,
                                            LLVMConstInt(ctx->i32, 0x80000000, 0),
                                            1, ctx->i32_0, tf_base,
@@ -2981,7 +2978,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
        invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
        tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
 
-       if (ctx->screen->b.chip_class >= GFX9) {
+       if (ctx->screen->info.chip_class >= GFX9) {
                LLVMBasicBlockRef blocks[2] = {
                        LLVMGetInsertBlock(builder),
                        ctx->merged_wrap_if_state.entry_block
@@ -3007,7 +3004,7 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
        LLVMValueRef ret = ctx->return_value;
        unsigned vgpr;
 
-       if (ctx->screen->b.chip_class >= GFX9) {
+       if (ctx->screen->info.chip_class >= GFX9) {
                ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
                                          8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
                ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
@@ -3133,9 +3130,11 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
        ctx->return_value = ret;
 }
 
-static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
+                                    unsigned max_outputs,
+                                    LLVMValueRef *addrs)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
        struct si_shader *shader = ctx->shader;
        struct tgsi_shader_info *info = &shader->selector->info;
        unsigned i, chan;
@@ -3148,7 +3147,6 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
        /* Write outputs to LDS. The next shader (TCS aka HS) will read
         * its inputs from it. */
        for (i = 0; i < info->num_outputs; i++) {
-               LLVMValueRef *out_ptr = ctx->outputs[i];
                unsigned name = info->output_semantic_name[i];
                unsigned index = info->output_semantic_index[i];
 
@@ -3179,18 +3177,20 @@ static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
                        if (!(info->output_usagemask[i] & (1 << chan)))
                                continue;
 
-                       lds_store(bld_base, chan, dw_addr,
-                                 LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], ""));
+                       lds_store(ctx, chan, dw_addr,
+                                 LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
                }
        }
 
-       if (ctx->screen->b.chip_class >= GFX9)
+       if (ctx->screen->info.chip_class >= GFX9)
                si_set_ls_return_value_for_tcs(ctx);
 }
 
-static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
+                                    unsigned max_outputs,
+                                    LLVMValueRef *addrs)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
+       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
        struct si_shader *es = ctx->shader;
        struct tgsi_shader_info *info = &es->selector->info;
        LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
@@ -3199,7 +3199,7 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
        unsigned chan;
        int i;
 
-       if (ctx->screen->b.chip_class >= GFX9 && info->num_outputs) {
+       if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {
                unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
                LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
                LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
@@ -3211,7 +3211,6 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
        }
 
        for (i = 0; i < info->num_outputs; i++) {
-               LLVMValueRef *out_ptr = ctx->outputs[i];
                int param;
 
                if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
@@ -3222,12 +3221,12 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
                                                      info->output_semantic_index[i]);
 
                for (chan = 0; chan < 4; chan++) {
-                       LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
+                       LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
                        out_val = ac_to_integer(&ctx->ac, out_val);
 
                        /* GFX9 has the ESGS ring in LDS. */
-                       if (ctx->screen->b.chip_class >= GFX9) {
-                               lds_store(bld_base, param * 4 + chan, lds_base, out_val);
+                       if (ctx->screen->info.chip_class >= GFX9) {
+                               lds_store(ctx, param * 4 + chan, lds_base, out_val);
                                continue;
                        }
 
@@ -3239,29 +3238,45 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
                }
        }
 
-       if (ctx->screen->b.chip_class >= GFX9)
+       if (ctx->screen->info.chip_class >= GFX9)
                si_set_es_return_value_for_gs(ctx);
 }
 
 static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
 {
-       if (ctx->screen->b.chip_class >= GFX9)
+       if (ctx->screen->info.chip_class >= GFX9)
                return unpack_param(ctx, ctx->param_merged_wave_info, 16, 8);
        else
                return LLVMGetParam(ctx->main_fn, ctx->param_gs_wave_id);
 }
 
-static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void emit_gs_epilogue(struct si_shader_context *ctx)
 {
-       struct si_shader_context *ctx = si_shader_context(bld_base);
-
        ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
                         si_get_gs_wave_id(ctx));
 
-       if (ctx->screen->b.chip_class >= GFX9)
+       if (ctx->screen->info.chip_class >= GFX9)
                lp_build_endif(&ctx->merged_wrap_if_state);
 }
 
+static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
+                                    unsigned max_outputs,
+                                    LLVMValueRef *addrs)
+{
+       struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+       struct tgsi_shader_info UNUSED *info = &ctx->shader->selector->info;
+
+       assert(info->num_outputs <= max_outputs);
+
+       emit_gs_epilogue(ctx);
+}
+
+static void si_tgsi_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+       struct si_shader_context *ctx = si_shader_context(bld_base);
+       emit_gs_epilogue(ctx);
+}
+
 static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
                                     unsigned max_outputs,
                                     LLVMValueRef *addrs)
@@ -3344,7 +3359,7 @@ static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
                i++;
        }
 
-       si_llvm_export_vs(&ctx->bld_base, outputs, i);
+       si_llvm_export_vs(ctx, outputs, i);
        FREE(outputs);
 }
 
@@ -3440,9 +3455,9 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
 
        /* SI (except OLAND and HAINAN) has a bug that it only looks
         * at the X writemask component. */
-       if (ctx->screen->b.chip_class == SI &&
-           ctx->screen->b.family != CHIP_OLAND &&
-           ctx->screen->b.family != CHIP_HAINAN)
+       if (ctx->screen->info.chip_class == SI &&
+           ctx->screen->info.family != CHIP_OLAND &&
+           ctx->screen->info.family != CHIP_HAINAN)
                mask |= 0x1;
 
        /* Specify which components to enable */
@@ -3485,7 +3500,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
 
                /* Get the export arguments, also find out what the last one is. */
                for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
-                       si_llvm_init_export_args(bld_base, color,
+                       si_llvm_init_export_args(ctx, color,
                                                 V_008DFC_SQ_EXP_MRT + c, &args[c]);
                        if (args[c].enabled_channels)
                                last = c;
@@ -3505,7 +3520,7 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
                struct ac_export_args args;
 
                /* Export */
-               si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
+               si_llvm_init_export_args(ctx, color, V_008DFC_SQ_EXP_MRT + index,
                                         &args);
                if (is_last) {
                        args.valid_mask = 1; /* whether the EXEC mask is valid */
@@ -4156,7 +4171,7 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
         * The real barrier instruction isn’t needed, because an entire patch
         * always fits into a single wave.
         */
-       if (ctx->screen->b.chip_class == SI &&
+       if (ctx->screen->info.chip_class == SI &&
            ctx->type == PIPE_SHADER_TESS_CTRL) {
                si_emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
                return;
@@ -4215,7 +4230,7 @@ static void si_create_function(struct si_shader_context *ctx,
                                           "no-signed-zeros-fp-math",
                                           "true");
 
-       if (ctx->screen->b.debug_flags & DBG(UNSAFE_MATH)) {
+       if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) {
                /* These were copied from some LLVM test. */
                LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
                                                   "less-precise-fpmad",
@@ -4262,10 +4277,10 @@ static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
        case PIPE_SHADER_TESS_CTRL:
                /* Return this so that LLVM doesn't remove s_barrier
                 * instructions on chips where we use s_barrier. */
-               return shader->selector->screen->b.chip_class >= CIK ? 128 : 64;
+               return shader->selector->screen->info.chip_class >= CIK ? 128 : 64;
 
        case PIPE_SHADER_GEOMETRY:
-               return shader->selector->screen->b.chip_class >= GFX9 ? 128 : 64;
+               return shader->selector->screen->info.chip_class >= GFX9 ? 128 : 64;
 
        case PIPE_SHADER_COMPUTE:
                break; /* see below */
@@ -4391,7 +4406,7 @@ static void create_function(struct si_shader_context *ctx)
        si_init_function_info(&fninfo);
 
        /* Set MERGED shaders. */
-       if (ctx->screen->b.chip_class >= GFX9) {
+       if (ctx->screen->info.chip_class >= GFX9) {
                if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL)
                        type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
                else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY)
@@ -4433,10 +4448,8 @@ static void create_function(struct si_shader_context *ctx)
                declare_vs_specific_input_sgprs(ctx, &fninfo);
 
                if (shader->key.as_es) {
-                       assert(!shader->selector->nir);
                        ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                } else if (shader->key.as_ls) {
-                       assert(!shader->selector->nir);
                        /* no extra parameters */
                } else {
                        if (shader->is_gs_copy_shader) {
@@ -4743,9 +4756,7 @@ static void create_function(struct si_shader_context *ctx)
        if (shader->key.as_ls ||
            ctx->type == PIPE_SHADER_TESS_CTRL ||
            /* GFX9 has the ESGS ring buffer in LDS. */
-           (ctx->screen->b.chip_class >= GFX9 &&
-            (shader->key.as_es ||
-             ctx->type == PIPE_SHADER_GEOMETRY)))
+           type == SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY)
                ac_declare_lds_as_pointer(&ctx->ac);
 }
 
@@ -4760,7 +4771,7 @@ static void preload_ring_buffers(struct si_shader_context *ctx)
        LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
                                            ctx->param_rw_buffers);
 
-       if (ctx->screen->b.chip_class <= VI &&
+       if (ctx->screen->info.chip_class <= VI &&
            (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) {
                unsigned ring =
                        ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
@@ -5023,14 +5034,14 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 
        r600_resource_reference(&shader->bo, NULL);
        shader->bo = (struct r600_resource*)
-                    pipe_buffer_create(&sscreen->b.b, 0,
+                    pipe_buffer_create(&sscreen->b, 0,
                                        PIPE_USAGE_IMMUTABLE,
                                        align(bo_size, SI_CPDMA_ALIGNMENT));
        if (!shader->bo)
                return -ENOMEM;
 
        /* Upload. */
-       ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
+       ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
                                        PIPE_TRANSFER_READ_WRITE |
                                        PIPE_TRANSFER_UNSYNCHRONIZED);
 
@@ -5057,7 +5068,7 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
        else if (mainb->rodata_size > 0)
                memcpy(ptr, mainb->rodata, mainb->rodata_size);
 
-       sscreen->b.ws->buffer_unmap(shader->bo->buf);
+       sscreen->ws->buffer_unmap(shader->bo->buf);
        return 0;
 }
 
@@ -5119,11 +5130,11 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
        const struct si_shader_config *conf = &shader->config;
        unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0;
        unsigned code_size = si_get_shader_binary_size(shader);
-       unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
+       unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;
        unsigned lds_per_wave = 0;
        unsigned max_simd_waves;
 
-       switch (sscreen->b.family) {
+       switch (sscreen->info.family) {
        /* These always have 8 waves: */
        case CHIP_POLARIS10:
        case CHIP_POLARIS11:
@@ -5162,7 +5173,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
 
        /* Compute the per-SIMD wave counts. */
        if (conf->num_sgprs) {
-               if (sscreen->b.chip_class >= VI)
+               if (sscreen->info.chip_class >= VI)
                        max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
                else
                        max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
@@ -5177,7 +5188,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
                max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
 
        if (!check_debug_option ||
-           si_can_dump_shader(&sscreen->b, processor)) {
+           si_can_dump_shader(sscreen, processor)) {
                if (processor == PIPE_SHADER_FRAGMENT) {
                        fprintf(file, "*** SHADER CONFIG ***\n"
                                "SPI_PS_INPUT_ADDR = 0x%04x\n"
@@ -5249,7 +5260,7 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
                    FILE *file, bool check_debug_option)
 {
        if (!check_debug_option ||
-           si_can_dump_shader(&sscreen->b, processor))
+           si_can_dump_shader(sscreen, processor))
                si_dump_shader_key(processor, shader, file);
 
        if (!check_debug_option && shader->binary.llvm_ir_string) {
@@ -5266,8 +5277,8 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
        }
 
        if (!check_debug_option ||
-           (si_can_dump_shader(&sscreen->b, processor) &&
-            !(sscreen->b.debug_flags & DBG(NO_ASM)))) {
+           (si_can_dump_shader(sscreen, processor) &&
+            !(sscreen->debug_flags & DBG(NO_ASM)))) {
                fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
 
                if (shader->prolog)
@@ -5302,12 +5313,12 @@ static int si_compile_llvm(struct si_screen *sscreen,
                           const char *name)
 {
        int r = 0;
-       unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
+       unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
 
-       if (si_can_dump_shader(&sscreen->b, processor)) {
+       if (si_can_dump_shader(sscreen, processor)) {
                fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
 
-               if (!(sscreen->b.debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
+               if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
                        fprintf(stderr, "%s LLVM IR:\n\n", name);
                        ac_dump_module(mod);
                        fprintf(stderr, "\n");
@@ -5486,7 +5497,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                }
 
                if (stream == 0)
-                       si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
+                       si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs);
 
                LLVMBuildBr(builder, end_bb);
        }
@@ -5504,7 +5515,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                            debug, PIPE_SHADER_GEOMETRY,
                            "GS Copy Shader");
        if (!r) {
-               if (si_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
+               if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
                        fprintf(stderr, "GS Copy Shader:\n");
                si_shader_dump(sscreen, ctx.shader, debug,
                               PIPE_SHADER_GEOMETRY, stderr, true);
@@ -5557,7 +5568,7 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
                break;
 
        case PIPE_SHADER_TESS_CTRL:
-               if (shader->selector->screen->b.chip_class >= GFX9) {
+               if (shader->selector->screen->info.chip_class >= GFX9) {
                        si_dump_shader_key_vs(key, &key->part.tcs.ls_prolog,
                                              "part.tcs.ls_prolog", f);
                }
@@ -5575,7 +5586,7 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
                if (shader->is_gs_copy_shader)
                        break;
 
-               if (shader->selector->screen->b.chip_class >= GFX9 &&
+               if (shader->selector->screen->info.chip_class >= GFX9 &&
                    key->part.gs.es->type == PIPE_SHADER_VERTEX) {
                        si_dump_shader_key_vs(key, &key->part.gs.vs_prolog,
                                              "part.gs.vs_prolog", f);
@@ -5742,13 +5753,12 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
        case PIPE_SHADER_VERTEX:
                ctx->load_input = declare_input_vs;
                if (shader->key.as_ls)
-                       bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
+                       ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
                else if (shader->key.as_es)
-                       bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
-               else {
+                       ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
+               else
                        ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
-                       bld_base->emit_epilogue = si_tgsi_emit_epilogue;
-               }
+               bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                break;
        case PIPE_SHADER_TESS_CTRL:
                bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
@@ -5759,16 +5769,16 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
        case PIPE_SHADER_TESS_EVAL:
                bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
                if (shader->key.as_es)
-                       bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
-               else {
+                       ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
+               else
                        ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
-                       bld_base->emit_epilogue = si_tgsi_emit_epilogue;
-               }
+               bld_base->emit_epilogue = si_tgsi_emit_epilogue;
                break;
        case PIPE_SHADER_GEOMETRY:
                bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
                ctx->abi.emit_vertex = si_llvm_emit_vertex;
-               bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
+               ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
+               bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
                break;
        case PIPE_SHADER_FRAGMENT:
                ctx->load_input = declare_input_fs;
@@ -5800,7 +5810,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
         * For monolithic merged shaders, the first shader is wrapped in an
         * if-block together with its prolog in si_build_wrapper_function.
         */
-       if (ctx->screen->b.chip_class >= GFX9) {
+       if (ctx->screen->info.chip_class >= GFX9) {
                if (!is_monolithic &&
                    sel->info.num_instructions > 1 && /* not empty shader */
                    (shader->key.as_es || shader->key.as_ls) &&
@@ -6074,7 +6084,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx,
 
        si_init_function_info(&fninfo);
 
-       if (ctx->screen->b.chip_class >= GFX9) {
+       if (ctx->screen->info.chip_class >= GFX9) {
                num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR;
                num_vgprs = 5; /* ES inputs are not needed by GS */
        } else {
@@ -6101,7 +6111,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx,
         * with registers here. The main shader part will set the correct EXEC
         * mask.
         */
-       if (ctx->screen->b.chip_class >= GFX9 && !key->gs_prolog.is_monolithic)
+       if (ctx->screen->info.chip_class >= GFX9 && !key->gs_prolog.is_monolithic)
                si_init_exec_full_mask(ctx);
 
        /* Copy inputs to outputs. This should be no-op, as the registers match,
@@ -6136,7 +6146,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx,
                LLVMValueRef vtx_in[6], vtx_out[6];
                LLVMValueRef prim_id, rotate;
 
-               if (ctx->screen->b.chip_class >= GFX9) {
+               if (ctx->screen->info.chip_class >= GFX9) {
                        for (unsigned i = 0; i < 3; i++) {
                                vtx_in[i*2] = unpack_param(ctx, gfx9_vtx_params[i], 0, 16);
                                vtx_in[i*2+1] = unpack_param(ctx, gfx9_vtx_params[i], 16, 16);
@@ -6156,7 +6166,7 @@ static void si_build_gs_prolog_function(struct si_shader_context *ctx,
                        vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, "");
                }
 
-               if (ctx->screen->b.chip_class >= GFX9) {
+               if (ctx->screen->info.chip_class >= GFX9) {
                        for (unsigned i = 0; i < 3; i++) {
                                LLVMValueRef hi, out;
 
@@ -6422,8 +6432,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
        /* Dump TGSI code before doing TGSI->LLVM conversion in case the
         * conversion fails. */
-       if (si_can_dump_shader(&sscreen->b, sel->info.processor) &&
-           !(sscreen->b.debug_flags & DBG(NO_TGSI))) {
+       if (si_can_dump_shader(sscreen, sel->info.processor) &&
+           !(sscreen->debug_flags & DBG(NO_TGSI))) {
                if (sel->tokens)
                        tgsi_dump(sel->tokens, 0);
                else
@@ -6464,7 +6474,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                si_build_wrapper_function(&ctx, parts + !need_prolog,
                                          1 + need_prolog, need_prolog, 0);
        } else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
-               if (sscreen->b.chip_class >= GFX9) {
+               if (sscreen->info.chip_class >= GFX9) {
                        struct si_shader_selector *ls = shader->key.part.tcs.ls;
                        LLVMValueRef parts[4];
                        bool vs_needs_prolog =
@@ -6529,7 +6539,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                        si_build_wrapper_function(&ctx, parts, 2, 0, 0);
                }
        } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
-               if (ctx.screen->b.chip_class >= GFX9) {
+               if (ctx.screen->info.chip_class >= GFX9) {
                        struct si_shader_selector *es = shader->key.part.gs.es;
                        LLVMValueRef es_prolog = NULL;
                        LLVMValueRef es_main = NULL;
@@ -6549,7 +6559,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
                                union si_shader_part_key vs_prolog_key;
                                si_get_vs_prolog_key(&es->info,
                                                     shader->info.num_input_sgprs,
-                                                    &shader->key.part.tcs.ls_prolog,
+                                                    &shader->key.part.gs.vs_prolog,
                                                     shader, &vs_prolog_key);
                                vs_prolog_key.vs_prolog.is_monolithic = true;
                                si_build_vs_prolog_function(&ctx, &vs_prolog_key);
@@ -6631,7 +6641,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
        si_optimize_vs_outputs(&ctx);
 
        if ((debug && debug->debug_message) ||
-           si_can_dump_shader(&sscreen->b, ctx.type))
+           si_can_dump_shader(sscreen, ctx.type))
                si_count_scratch_private_memory(&ctx);
 
        /* Compile to bytecode. */
@@ -6649,7 +6659,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
        if (sel->type == PIPE_SHADER_COMPUTE) {
                unsigned wave_size = 64;
                unsigned max_vgprs = 256;
-               unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
+               unsigned max_sgprs = sscreen->info.chip_class >= VI ? 800 : 512;
                unsigned max_sgprs_per_wave = 128;
                unsigned max_block_threads = si_get_max_workgroup_size(shader);
                unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
@@ -6820,7 +6830,7 @@ static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
 {
        LLVMValueRef ptr[2], list;
        bool is_merged_shader =
-               ctx->screen->b.chip_class >= GFX9 &&
+               ctx->screen->info.chip_class >= GFX9 &&
                (ctx->type == PIPE_SHADER_TESS_CTRL ||
                 ctx->type == PIPE_SHADER_GEOMETRY ||
                 ctx->shader->key.as_ls || ctx->shader->key.as_es);
@@ -7032,7 +7042,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 
        si_init_function_info(&fninfo);
 
-       if (ctx->screen->b.chip_class >= GFX9) {
+       if (ctx->screen->info.chip_class >= GFX9) {
                add_arg(&fninfo, ARG_SGPR, ctx->i64);
                ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
                add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */
@@ -7081,7 +7091,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 
        /* Create the function. */
        si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
-                          ctx->screen->b.chip_class >= CIK ? 128 : 64);
+                          ctx->screen->info.chip_class >= CIK ? 128 : 64);
        ac_declare_lds_as_pointer(&ctx->ac);
        func = ctx->main_fn;
 
@@ -7106,7 +7116,7 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
                                       struct si_shader *shader,
                                       struct pipe_debug_callback *debug)
 {
-       if (sscreen->b.chip_class >= GFX9) {
+       if (sscreen->info.chip_class >= GFX9) {
                struct si_shader *ls_main_part =
                        shader->key.part.tcs.ls->main_shader_part_ls;
 
@@ -7138,7 +7148,7 @@ static bool si_shader_select_gs_parts(struct si_screen *sscreen,
                                      struct si_shader *shader,
                                      struct pipe_debug_callback *debug)
 {
-       if (sscreen->b.chip_class >= GFX9) {
+       if (sscreen->info.chip_class >= GFX9) {
                struct si_shader *es_main_part =
                        shader->key.part.gs.es->main_shader_part_es;
 
@@ -7653,9 +7663,9 @@ void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
         *   Make sure we have at least 4k of LDS in use to avoid the bug.
         *   It applies to workgroup sizes of more than one wavefront.
         */
-       if (sscreen->b.family == CHIP_BONAIRE ||
-           sscreen->b.family == CHIP_KABINI ||
-           sscreen->b.family == CHIP_MULLINS)
+       if (sscreen->info.family == CHIP_BONAIRE ||
+           sscreen->info.family == CHIP_KABINI ||
+           sscreen->info.family == CHIP_MULLINS)
                *lds_size = MAX2(*lds_size, 8);
 }
 
@@ -7819,7 +7829,7 @@ void si_shader_destroy(struct si_shader *shader)
        r600_resource_reference(&shader->bo, NULL);
 
        if (!shader->is_binary_shared)
-               si_radeon_shader_binary_clean(&shader->binary);
+               ac_shader_binary_clean(&shader->binary);
 
        free(shader->shader_log);
 }