X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_nir_to_llvm.c;h=c7d772fa652bad4ba5d32dfe3a6f3914aba0e2de;hb=4cf8f329edb3ad3482819de8dc091061ae19c5af;hp=d4c99539aab4529110a0d970896ed67fe912b735;hpb=27a5e5366e89498d98d786cc84fafbdb220c4d94;p=mesa.git diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index d4c99539aab..c7d772fa652 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -27,11 +27,15 @@ #include "radv_private.h" #include "radv_shader.h" +#include "radv_shader_helper.h" #include "nir/nir.h" #include #include #include +#if HAVE_LLVM >= 0x0700 +#include +#endif #include "sid.h" #include "gfx9d.h" @@ -60,13 +64,8 @@ struct radv_shader_context { LLVMValueRef vertex_buffers; LLVMValueRef rel_auto_id; LLVMValueRef vs_prim_id; - LLVMValueRef ls_out_layout; LLVMValueRef es2gs_offset; - LLVMValueRef tcs_offchip_layout; - LLVMValueRef tcs_out_offsets; - LLVMValueRef tcs_out_layout; - LLVMValueRef tcs_in_layout; LLVMValueRef oc_lds; LLVMValueRef merged_wave_info; LLVMValueRef tess_factor_offset; @@ -74,8 +73,6 @@ struct radv_shader_context { LLVMValueRef tes_u; LLVMValueRef tes_v; - LLVMValueRef gsvs_ring_stride; - LLVMValueRef gsvs_num_entries; LLVMValueRef gs2vs_offset; LLVMValueRef gs_wave_id; LLVMValueRef gs_vtx_offset[6]; @@ -85,7 +82,6 @@ struct radv_shader_context { LLVMValueRef hs_ring_tess_offchip; LLVMValueRef hs_ring_tess_factor; - LLVMValueRef sample_pos_offset; LLVMValueRef persp_sample, persp_center, persp_centroid; LLVMValueRef linear_sample, linear_center, linear_centroid; @@ -103,12 +99,14 @@ struct radv_shader_context { unsigned gs_max_out_vertices; unsigned tes_primitive_mode; - uint64_t tess_outputs_written; - uint64_t tess_patch_outputs_written; uint32_t tcs_patch_outputs_read; uint64_t tcs_outputs_read; uint32_t tcs_vertices_per_patch; + uint32_t tcs_num_inputs; + uint32_t tcs_num_patches; + uint32_t max_gsvs_emit_size; + uint32_t gsvs_vertex_size; }; enum radeon_llvm_calling_convention { @@ -126,6 +124,98 @@ radv_shader_context_from_abi(struct ac_shader_abi *abi) return container_of(abi, ctx, abi); } +struct ac_build_if_state +{ + struct radv_shader_context *ctx; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block; + LLVMBasicBlockRef true_block; + LLVMBasicBlockRef false_block; + LLVMBasicBlockRef merge_block; +}; + +static LLVMBasicBlockRef +ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + /* get current basic block */ + current_block = LLVMGetInsertBlock(ctx->ac.builder); + + /* chqeck if there's another block after this one */ + next_block = LLVMGetNextBasicBlock(current_block); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); + } + else { + /* append new block after current block */ + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); + } + return new_block; +} + +static void +ac_nir_build_if(struct ac_build_if_state *ifthen, + struct radv_shader_context *ctx, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder); + + memset(ifthen, 0, sizeof *ifthen); + ifthen->ctx = ctx; + ifthen->condition = condition; + ifthen->entry_block = block; + + /* create endif/merge basic block for the phi functions */ + ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); + + /* create/insert true_block before merge_block */ + ifthen->true_block = + LLVMInsertBasicBlockInContext(ctx->context, + ifthen->merge_block, + "if-true-block"); + + /* successive code goes into the true block */ + LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block); +} + +/** + * End a conditional. + */ +static void +ac_nir_build_endif(struct ac_build_if_state *ifthen) +{ + LLVMBuilderRef builder = ifthen->ctx->ac.builder; + + /* Insert branch to the merge block from current block */ + LLVMBuildBr(builder, ifthen->merge_block); + + /* + * Now patch in the various branch instructions. + */ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); + if (ifthen->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); + } + + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); +} + + static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) { switch (ctx->stage) { @@ -139,6 +229,78 @@ static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) } } +static unsigned +get_tcs_num_patches(struct radv_shader_context *ctx) +{ + unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices; + unsigned num_tcs_output_cp = ctx->tcs_vertices_per_patch; + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + unsigned num_patches; + unsigned hardware_lds_size; + + /* Ensure that we only need one wave per SIMD so we don't need to check + * resource usage. Also ensures that the number of tcs in and out + * vertices per threadgroup are at most 256. + */ + num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4; + /* Make sure that the data fits in LDS. This assumes the shaders only + * use LDS for the inputs and outputs. + */ + hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768; + num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); + /* Make sure the output data fits in the offchip buffer */ + num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size); + /* Not necessary for correctness, but improves performance. The + * specific value is taken from the proprietary driver. + */ + num_patches = MIN2(num_patches, 40); + + /* SI bug workaround - limit LS-HS threadgroups to only one wave. */ + if (ctx->options->chip_class == SI) { + unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); + num_patches = MIN2(num_patches, one_wave); + } + return num_patches; +} + +static unsigned +calculate_tess_lds_size(struct radv_shader_context *ctx) +{ + unsigned num_tcs_input_cp = ctx->options->key.tcs.input_vertices; + unsigned num_tcs_output_cp; + unsigned num_tcs_outputs, num_tcs_patch_outputs; + unsigned input_vertex_size, output_vertex_size; + unsigned input_patch_size, output_patch_size; + unsigned pervertex_output_patch_size; + unsigned output_patch0_offset; + unsigned num_patches; + unsigned lds_size; + + num_tcs_output_cp = ctx->tcs_vertices_per_patch; + num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written); + + input_vertex_size = ctx->tcs_num_inputs * 16; + output_vertex_size = num_tcs_outputs * 16; + + input_patch_size = num_tcs_input_cp * input_vertex_size; + + pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; + output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + + num_patches = ctx->tcs_num_patches; + output_patch0_offset = input_patch_size * num_patches; + + lds_size = output_patch0_offset + output_patch_size * num_patches; + return lds_size; +} + /* Tessellation shaders pass outputs to the next shader using LDS. * * LS outputs = TCS inputs @@ -162,42 +324,66 @@ static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) static LLVMValueRef get_tcs_in_patch_stride(struct radv_shader_context *ctx) { - if (ctx->stage == MESA_SHADER_VERTEX) - return ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13); - else if (ctx->stage == MESA_SHADER_TESS_CTRL) - return ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13); - else { - assert(0); - return NULL; - } + assert (ctx->stage == MESA_SHADER_TESS_CTRL); + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + + input_patch_size /= 4; + return LLVMConstInt(ctx->ac.i32, input_patch_size, false); } static LLVMValueRef get_tcs_out_patch_stride(struct radv_shader_context *ctx) { - return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13); + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t num_tcs_patch_outputs = util_last_bit64(ctx->shader_info->info.tcs.patch_outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + uint32_t output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; + output_patch_size /= 4; + return LLVMConstInt(ctx->ac.i32, output_patch_size, false); } static LLVMValueRef get_tcs_out_vertex_stride(struct radv_shader_context *ctx) { - return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + output_vertex_size /= 4; + return LLVMConstInt(ctx->ac.i32, output_vertex_size, false); } static LLVMValueRef get_tcs_out_patch0_offset(struct radv_shader_context *ctx) { - return LLVMBuildMul(ctx->ac.builder, - ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16), - LLVMConstInt(ctx->ac.i32, 4, false), ""); + assert (ctx->stage == MESA_SHADER_TESS_CTRL); + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + uint32_t output_patch0_offset = input_patch_size; + unsigned num_patches = ctx->tcs_num_patches; + + output_patch0_offset *= num_patches; + output_patch0_offset /= 4; + return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false); } static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx) { - return LLVMBuildMul(ctx->ac.builder, - ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16), - LLVMConstInt(ctx->ac.i32, 4, false), ""); + assert (ctx->stage == MESA_SHADER_TESS_CTRL); + uint32_t input_vertex_size = ctx->tcs_num_inputs * 16; + uint32_t input_patch_size = ctx->options->key.tcs.input_vertices * input_vertex_size; + uint32_t output_patch0_offset = input_patch_size; + + uint32_t num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + unsigned num_patches = ctx->tcs_num_patches; + + output_patch0_offset *= num_patches; + output_patch0_offset += pervertex_output_patch_size; + output_patch0_offset /= 4; + return LLVMConstInt(ctx->ac.i32, output_patch0_offset, false); } static LLVMValueRef @@ -294,7 +480,7 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, unsigned num_return_elems, struct arg_info *args, unsigned max_workgroup_size, - bool unsafe_math) + const struct radv_nir_compiler_options *options) { LLVMTypeRef main_function_type, ret_type; LLVMBasicBlockRef main_function_body; @@ -325,12 +511,18 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, } } + if (options->address32_hi) { + ac_llvm_add_target_dep_function_attr(main_function, + "amdgpu-32bit-address-high-bits", + options->address32_hi); + } + if (max_workgroup_size) { ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-max-work-group-size", max_workgroup_size); } - if (unsafe_math) { + if (options->unsafe_math) { /* These were copied from some LLVM test. */ LLVMAddTargetDependentFunctionAttr(main_function, "less-precise-fpmad", @@ -374,20 +566,31 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, set_loc(ud_info, sgpr_idx, num_sgprs, 0); } +static void +set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) +{ + bool use_32bit_pointers = HAVE_32BIT_POINTERS && + idx != AC_UD_SCRATCH_RING_OFFSETS; + + set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); +} + static void set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, uint32_t indirect_offset) { - struct radv_userdata_info *ud_info = - &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; + struct radv_userdata_locations *locs = + &ctx->shader_info->user_sgprs_locs; + struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, 2, indirect_offset); + set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset); + if (indirect_offset == 0) + locs->descriptor_sets_enabled |= 1 << idx; } struct user_sgpr_info { bool need_ring_offsets; - uint8_t sgpr_count; bool indirect_all_descriptor_sets; }; @@ -420,7 +623,8 @@ count_vs_user_sgprs(struct radv_shader_context *ctx) { uint8_t count = 0; - count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; + if (ctx->shader_info->info.vs.has_vertex_buffers) + count += HAVE_32BIT_POINTERS ? 1 : 2; count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; return count; @@ -433,6 +637,8 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, bool needs_view_index, struct user_sgpr_info *user_sgpr_info) { + uint8_t user_sgpr_count = 0; + memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ @@ -449,62 +655,53 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, /* 2 user sgprs will nearly always be allocated for scratch/rings */ if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { - user_sgpr_info->sgpr_count += 2; + user_sgpr_count += 2; } switch (stage) { case MESA_SHADER_COMPUTE: if (ctx->shader_info->info.cs.uses_grid_size) - user_sgpr_info->sgpr_count += 3; + user_sgpr_count += 3; break; case MESA_SHADER_FRAGMENT: - user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; + user_sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; break; case MESA_SHADER_VERTEX: if (!ctx->is_gs_copy_shader) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - if (ctx->options->key.vs.as_ls) - user_sgpr_info->sgpr_count++; + user_sgpr_count += count_vs_user_sgprs(ctx); break; case MESA_SHADER_TESS_CTRL: if (has_previous_stage) { if (previous_stage == MESA_SHADER_VERTEX) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - user_sgpr_info->sgpr_count++; + user_sgpr_count += count_vs_user_sgprs(ctx); } - user_sgpr_info->sgpr_count += 4; break; case MESA_SHADER_TESS_EVAL: - user_sgpr_info->sgpr_count += 1; break; case MESA_SHADER_GEOMETRY: if (has_previous_stage) { if (previous_stage == MESA_SHADER_VERTEX) { - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - } else { - user_sgpr_info->sgpr_count++; + user_sgpr_count += count_vs_user_sgprs(ctx); } } - user_sgpr_info->sgpr_count += 2; break; default: break; } if (needs_view_index) - user_sgpr_info->sgpr_count++; + user_sgpr_count++; if (ctx->shader_info->info.loads_push_constants) - user_sgpr_info->sgpr_count += 2; + user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; - uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; + uint32_t num_desc_set = + util_bitcount(ctx->shader_info->info.desc_set_used_mask); - if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { - user_sgpr_info->sgpr_count += 2; + if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) { user_sgpr_info->indirect_all_descriptor_sets = true; - } else { - user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; } } @@ -517,7 +714,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx, struct arg_info *args, LLVMValueRef *desc_sets) { - LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); + LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8); unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; unsigned stage_mask = 1 << stage; @@ -535,7 +732,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx, } } } else { - add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); + add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets); } if (ctx->shader_info->info.loads_push_constants) { @@ -555,7 +752,8 @@ declare_vs_specific_input_sgprs(struct radv_shader_context *ctx, (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), + add_arg(args, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->ac.v4i32), &ctx->vertex_buffers); } add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); @@ -613,8 +811,8 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, ctx->descriptor_sets[i] = NULL; } } else { - set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, - user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, + user_sgpr_idx); for (unsigned i = 0; i < num_sets; ++i) { if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && @@ -632,7 +830,7 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, } if (ctx->shader_info->info.loads_push_constants) { - set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); } } @@ -646,8 +844,8 @@ set_vs_specific_input_locs(struct radv_shader_context *ctx, (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, - user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_VS_VERTEX_BUFFERS, + user_sgpr_idx); } unsigned vs_num = 2; @@ -659,28 +857,6 @@ set_vs_specific_input_locs(struct radv_shader_context *ctx, } } -static unsigned shader_io_get_unique_index(gl_varying_slot slot) -{ - /* handle patch indices separate */ - if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) - return 0; - if (slot == VARYING_SLOT_TESS_LEVEL_INNER) - return 1; - if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) - return 2 + (slot - VARYING_SLOT_PATCH0); - - if (slot == VARYING_SLOT_POS) - return 0; - if (slot == VARYING_SLOT_PSIZ) - return 1; - if (slot == VARYING_SLOT_CLIP_DIST0) - return 2; - /* 3 is reserved for clip dist as well */ - if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) - return 4 + (slot - VARYING_SLOT_VAR0); - unreachable("illegal slot in get unique index\n"); -} - static void set_llvm_calling_convention(LLVMValueRef func, gl_shader_stage stage) { @@ -695,7 +871,7 @@ static void set_llvm_calling_convention(LLVMValueRef func, calling_conv = RADEON_LLVM_AMDGPU_GS; break; case MESA_SHADER_TESS_CTRL: - calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS; + calling_conv = RADEON_LLVM_AMDGPU_HS; break; case MESA_SHADER_FRAGMENT: calling_conv = RADEON_LLVM_AMDGPU_PS; @@ -765,9 +941,6 @@ static void create_function(struct radv_shader_context *ctx, if (ctx->options->key.vs.as_es) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->es2gs_offset); - else if (ctx->options->key.vs.as_ls) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->ls_out_layout); declare_vs_input_vgprs(ctx, &args); break; @@ -793,17 +966,6 @@ static void create_function(struct radv_shader_context *ctx, has_previous_stage, previous_stage, &args); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->ls_out_layout); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_offsets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_in_layout); if (needs_view_index) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.view_index); @@ -821,14 +983,6 @@ static void create_function(struct radv_shader_context *ctx, &user_sgpr_info, &args, &desc_sets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_offsets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_in_layout); if (needs_view_index) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.view_index); @@ -847,7 +1001,6 @@ static void create_function(struct radv_shader_context *ctx, previous_stage, &user_sgpr_info, &args, &desc_sets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout); if (needs_view_index) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.view_index); @@ -882,20 +1035,13 @@ static void create_function(struct radv_shader_context *ctx, &user_sgpr_info, &args, &desc_sets); - if (previous_stage == MESA_SHADER_TESS_EVAL) { - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - } else { + if (previous_stage != MESA_SHADER_TESS_EVAL) { declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, previous_stage, &args); } - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_ring_stride); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_num_entries); if (needs_view_index) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.view_index); @@ -923,10 +1069,6 @@ static void create_function(struct radv_shader_context *ctx, &user_sgpr_info, &args, &desc_sets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_ring_stride); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_num_entries); if (needs_view_index) add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.view_index); @@ -956,10 +1098,6 @@ static void create_function(struct radv_shader_context *ctx, previous_stage, &user_sgpr_info, &args, &desc_sets); - if (ctx->shader_info->info.ps.needs_sample_positions) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->sample_pos_offset); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); @@ -984,8 +1122,7 @@ static void create_function(struct radv_shader_context *ctx, ctx->main_function = create_llvm_function( ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args, - ctx->max_workgroup_size, - ctx->options->unsafe_math); + ctx->max_workgroup_size, ctx->options); set_llvm_calling_convention(ctx->main_function, stage); @@ -1002,8 +1139,8 @@ static void create_function(struct radv_shader_context *ctx, user_sgpr_idx = 0; if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, - &user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx); if (ctx->options->supports_spill) { ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE), @@ -1033,23 +1170,14 @@ static void create_function(struct radv_shader_context *ctx, previous_stage, &user_sgpr_idx); if (ctx->abi.view_index) set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - if (ctx->options->key.vs.as_ls) { - set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, - &user_sgpr_idx, 1); - } break; case MESA_SHADER_TESS_CTRL: set_vs_specific_input_locs(ctx, stage, has_previous_stage, previous_stage, &user_sgpr_idx); - if (has_previous_stage) - set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, - &user_sgpr_idx, 1); - set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); if (ctx->abi.view_index) set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; case MESA_SHADER_TESS_EVAL: - set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); if (ctx->abi.view_index) set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; @@ -1060,20 +1188,11 @@ static void create_function(struct radv_shader_context *ctx, has_previous_stage, previous_stage, &user_sgpr_idx); - else - set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, - &user_sgpr_idx, 1); } - set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, - &user_sgpr_idx, 2); if (ctx->abi.view_index) set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; case MESA_SHADER_FRAGMENT: - if (ctx->shader_info->info.ps.needs_sample_positions) { - set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, - &user_sgpr_idx, 1); - } break; default: unreachable("Shader stage not implemented"); @@ -1141,30 +1260,50 @@ radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, * * Note that every attribute has 4 components. */ +static LLVMValueRef get_non_vertex_index_offset(struct radv_shader_context *ctx) +{ + uint32_t num_patches = ctx->tcs_num_patches; + uint32_t num_tcs_outputs; + if (ctx->stage == MESA_SHADER_TESS_CTRL) + num_tcs_outputs = util_last_bit64(ctx->shader_info->info.tcs.outputs_written); + else + num_tcs_outputs = ctx->options->key.tes.tcs_num_outputs; + + uint32_t output_vertex_size = num_tcs_outputs * 16; + uint32_t pervertex_output_patch_size = ctx->tcs_vertices_per_patch * output_vertex_size; + + return LLVMConstInt(ctx->ac.i32, pervertex_output_patch_size * num_patches, false); +} + +static LLVMValueRef calc_param_stride(struct radv_shader_context *ctx, + LLVMValueRef vertex_index) +{ + LLVMValueRef param_stride; + if (vertex_index) + param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch * ctx->tcs_num_patches, false); + else + param_stride = LLVMConstInt(ctx->ac.i32, ctx->tcs_num_patches, false); + return param_stride; +} + static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx, LLVMValueRef vertex_index, LLVMValueRef param_index) { - LLVMValueRef base_addr, vertices_per_patch, num_patches; + LLVMValueRef base_addr; LLVMValueRef param_stride, constant16; LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false); - num_patches = ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9); - + LLVMValueRef vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false); constant16 = LLVMConstInt(ctx->ac.i32, 16, false); + param_stride = calc_param_stride(ctx, vertex_index); if (vertex_index) { base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id, vertices_per_patch, ""); base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, vertex_index, ""); - - param_stride = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, - num_patches, ""); } else { base_addr = rel_patch_id; - param_stride = num_patches; } base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, @@ -1174,8 +1313,7 @@ static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx, base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, ""); if (!vertex_index) { - LLVMValueRef patch_data_offset = - ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16); + LLVMValueRef patch_data_offset = get_non_vertex_index_offset(ctx); base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, ""); @@ -1203,18 +1341,6 @@ static LLVMValueRef get_tcs_tes_buffer_address_params(struct radv_shader_context return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); } -static void -mark_tess_output(struct radv_shader_context *ctx, - bool is_patch, uint32_t param, int num_slots) - -{ - uint64_t slot_mask = (1ull << num_slots) - 1; - if (is_patch) { - ctx->tess_patch_outputs_written |= (slot_mask << param); - } else - ctx->tess_outputs_written |= (slot_mask << param); -} - static LLVMValueRef get_dw_address(struct radv_shader_context *ctx, LLVMValueRef dw_addr, @@ -1271,7 +1397,8 @@ load_tcs_varyings(struct ac_shader_abi *abi, unsigned param = shader_io_get_unique_index(location); if (load_input) { - stride = ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); + uint32_t input_vertex_size = (ctx->tcs_num_inputs * 16) / 4; + stride = LLVMConstInt(ctx->ac.i32, input_vertex_size, false); dw_addr = get_tcs_in_current_patch_offset(ctx); } else { if (!is_patch) { @@ -1309,7 +1436,6 @@ store_tcs_output(struct ac_shader_abi *abi, const unsigned component = var->data.location_frac; const bool is_patch = var->data.patch; const bool is_compact = var->data.compact; - const unsigned count = glsl_count_attribute_slots(var->type, false); LLVMValueRef dw_addr; LLVMValueRef stride = NULL; LLVMValueRef buf_addr = NULL; @@ -1338,11 +1464,6 @@ store_tcs_output(struct ac_shader_abi *abi, dw_addr = get_tcs_out_current_patch_data_offset(ctx); } - if (param_index) - mark_tess_output(ctx, is_patch, param, count); - else - mark_tess_output(ctx, is_patch, param, 1); - dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, param_index); buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, @@ -1501,6 +1622,30 @@ static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi, return NULL; } +static uint32_t +radv_get_sample_pos_offset(uint32_t num_samples) +{ + uint32_t sample_pos_offset = 0; + + switch (num_samples) { + case 2: + sample_pos_offset = 1; + break; + case 4: + sample_pos_offset = 3; + break; + case 8: + sample_pos_offset = 7; + break; + case 16: + sample_pos_offset = 15; + break; + default: + break; + } + return sample_pos_offset; +} + static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id) { @@ -1512,7 +1657,12 @@ static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), ""); - sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id, ctx->sample_pos_offset, ""); + uint32_t sample_pos_offset = + radv_get_sample_pos_offset(ctx->options->key.fs.num_samples); + + sample_id = + LLVMBuildAdd(ctx->ac.builder, sample_id, + LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), ""); result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); return result; @@ -1522,9 +1672,14 @@ static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - uint8_t log2_ps_iter_samples = ctx->shader_info->info.ps.force_persample ? - ctx->options->key.fs.log2_num_samples : - ctx->options->key.fs.log2_ps_iter_samples; + uint8_t log2_ps_iter_samples; + + if (ctx->shader_info->info.ps.force_persample) { + log2_ps_iter_samples = + util_logbase2(ctx->options->key.fs.num_samples); + } else { + log2_ps_iter_samples = ctx->options->key.fs.log2_ps_iter_samples; + } /* The bit pattern matches that used by fixed function fragment * processing. */ @@ -1574,6 +1729,8 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr /* loop num outputs */ idx = 0; for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + unsigned output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[i]; LLVMValueRef *out_ptr = &addrs[i * 4]; int length = 4; int slot = idx; @@ -1587,8 +1744,13 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr length = ctx->num_output_clips + ctx->num_output_culls; if (length > 4) slot_inc = 2; + output_usage_mask = (1 << length) - 1; } + for (unsigned j = 0; j < length; j++) { + if (!(output_usage_mask & (1 << j))) + continue; + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false); @@ -1684,7 +1846,8 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned constant_index, LLVMValueRef index, enum ac_descriptor_type desc_type, - bool image, bool write) + bool image, bool write, + bool bindless) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; @@ -1749,11 +1912,53 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); - list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(type), ""); return ac_build_load_to_sgpr(&ctx->ac, list, index); } +/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW. + * so we may need to fix it up. */ +static LLVMValueRef +adjust_vertex_fetch_alpha(struct radv_shader_context *ctx, + unsigned adjustment, + LLVMValueRef alpha) +{ + if (adjustment == RADV_ALPHA_ADJUST_NONE) + return alpha; + + LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0); + + if (adjustment == RADV_ALPHA_ADJUST_SSCALED) + alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, ""); + else + alpha = ac_to_integer(&ctx->ac, alpha); + + /* For the integer-like cases, do a natural sign extension. + * + * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 + * and happen to contain 0, 1, 2, 3 as the two LSBs of the + * exponent. + */ + alpha = LLVMBuildShl(ctx->ac.builder, alpha, + adjustment == RADV_ALPHA_ADJUST_SNORM ? + LLVMConstInt(ctx->ac.i32, 7, 0) : c30, ""); + alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, ""); + + /* Convert back to the right type. */ + if (adjustment == RADV_ALPHA_ADJUST_SNORM) { + LLVMValueRef clamp; + LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0); + alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, ""); + clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, ""); + alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, ""); + } else if (adjustment == RADV_ALPHA_ADJUST_SSCALED) { + alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, ""); + } + + return alpha; +} static void handle_vs_input_decl(struct radv_shader_context *ctx, @@ -1764,30 +1969,43 @@ handle_vs_input_decl(struct radv_shader_context *ctx, LLVMValueRef t_list; LLVMValueRef input; LLVMValueRef buffer_index; - int index = variable->data.location - VERT_ATTRIB_GENERIC0; - int idx = variable->data.location; unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); uint8_t input_usage_mask = ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; unsigned num_channels = util_last_bit(input_usage_mask); - variable->data.driver_location = idx * 4; + variable->data.driver_location = variable->data.location * 4; + + for (unsigned i = 0; i < attrib_count; ++i) { + LLVMValueRef output[4]; + unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0; + + if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) { + uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index]; + + if (divisor) { + buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, + ctx->abi.start_instance, ""); + + if (divisor != 1) { + buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index, + LLVMConstInt(ctx->ac.i32, divisor, 0), ""); + } - for (unsigned i = 0; i < attrib_count; ++i, ++idx) { - if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) { - buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, - ctx->abi.start_instance, ""); - if (ctx->options->key.vs.as_ls) { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); + if (ctx->options->key.vs.as_ls) { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); + } else { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); + } } else { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); + buffer_index = ctx->ac.i32_0; } } else buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, ctx->abi.base_vertex, ""); - t_offset = LLVMConstInt(ctx->ac.i32, index + i, false); + t_offset = LLVMConstInt(ctx->ac.i32, attrib_index, false); t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); @@ -1800,9 +2018,15 @@ handle_vs_input_decl(struct radv_shader_context *ctx, for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - ctx->inputs[ac_llvm_reg_index_soa(idx, chan)] = - ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, - input, llvm_chan, "")); + output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, ""); + } + + unsigned alpha_adjust = (ctx->options->key.vs.alpha_adjust >> (attrib_index * 2)) & 3; + output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]); + + for (unsigned chan = 0; chan < 4; chan++) { + ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = + ac_to_integer(&ctx->ac, output[chan]); } } } @@ -1898,9 +2122,6 @@ static void prepare_interp_optimize(struct radv_shader_context *ctx, struct nir_shader *nir) { - if (!ctx->options->key.fs.multisample) - return; - bool uses_center = false; bool uses_centroid = false; nir_foreach_variable(variable, &nir->inputs) { @@ -2168,7 +2389,7 @@ radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) static void handle_vs_outputs_post(struct radv_shader_context *ctx, - bool export_prim_id, + bool export_prim_id, bool export_layer_id, struct radv_vs_output_info *outinfo) { uint32_t param_count = 0; @@ -2322,10 +2543,9 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, output_usage_mask = ctx->shader_info->info.tes.output_usage_mask[i]; } else { - /* Enable all channels for the GS copy shader because - * we don't know the output usage mask currently. - */ - output_usage_mask = 0xf; + assert(ctx->is_gs_copy_shader); + output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[i]; } radv_export_param(ctx, param_count, values, output_usage_mask); @@ -2342,12 +2562,24 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, for (unsigned j = 1; j < 4; j++) values[j] = ctx->ac.f32_0; - radv_export_param(ctx, param_count, values, 0xf); + radv_export_param(ctx, param_count, values, 0x1); outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++; outinfo->export_prim_id = true; } + if (export_layer_id && layer_value) { + LLVMValueRef values[4]; + + values[0] = layer_value; + for (unsigned j = 1; j < 4; j++) + values[j] = ctx->ac.f32_0; + + radv_export_param(ctx, param_count, values, 0x1); + + outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count++; + } + outinfo->pos_exports = num_pos_exports; outinfo->param_exports = param_count; } @@ -2393,14 +2625,26 @@ handle_es_outputs_post(struct radv_shader_context *ctx, for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef dw_addr = NULL; LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; + unsigned output_usage_mask; int param_index; int length = 4; if (!(ctx->output_mask & (1ull << i))) continue; - if (i == VARYING_SLOT_CLIP_DIST0) + if (ctx->stage == MESA_SHADER_VERTEX) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[i]; + } else { + assert(ctx->stage == MESA_SHADER_TESS_EVAL); + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[i]; + } + + if (i == VARYING_SLOT_CLIP_DIST0) { length = ctx->num_output_clips + ctx->num_output_culls; + output_usage_mask = (1 << length) - 1; + } param_index = shader_io_get_unique_index(i); @@ -2409,14 +2653,22 @@ handle_es_outputs_post(struct radv_shader_context *ctx, LLVMConstInt(ctx->ac.i32, param_index * 4, false), ""); } + for (j = 0; j < length; j++) { + if (!(output_usage_mask & (1 << j))) + continue; + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); if (ctx->ac.chip_class >= GFX9) { - ac_lds_store(&ctx->ac, dw_addr, + LLVMValueRef dw_addr_offset = + LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, + j, false), ""); + + ac_lds_store(&ctx->ac, dw_addr_offset, LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); } else { ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, @@ -2433,7 +2685,8 @@ static void handle_ls_outputs_post(struct radv_shader_context *ctx) { LLVMValueRef vertex_id = ctx->rel_auto_id; - LLVMValueRef vertex_dw_stride = ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8); + uint32_t num_tcs_inputs = util_last_bit64(ctx->shader_info->info.vs.ls_outputs_written); + LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false); LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, ""); @@ -2447,9 +2700,6 @@ handle_ls_outputs_post(struct radv_shader_context *ctx) if (i == VARYING_SLOT_CLIP_DIST0) length = ctx->num_output_clips + ctx->num_output_culls; int param = shader_io_get_unique_index(i); - mark_tess_output(ctx, false, param, 1); - if (length > 4) - mark_tess_output(ctx, false, param + 1, 1); LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, false), ""); @@ -2461,97 +2711,6 @@ handle_ls_outputs_post(struct radv_shader_context *ctx) } } -struct ac_build_if_state -{ - struct radv_shader_context *ctx; - LLVMValueRef condition; - LLVMBasicBlockRef entry_block; - LLVMBasicBlockRef true_block; - LLVMBasicBlockRef false_block; - LLVMBasicBlockRef merge_block; -}; - -static LLVMBasicBlockRef -ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name) -{ - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; - - /* get current basic block */ - current_block = LLVMGetInsertBlock(ctx->ac.builder); - - /* chqeck if there's another block after this one */ - next_block = LLVMGetNextBasicBlock(current_block); - if (next_block) { - /* insert the new block before the next block */ - new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); - } - else { - /* append new block after current block */ - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); - } - return new_block; -} - -static void -ac_nir_build_if(struct ac_build_if_state *ifthen, - struct radv_shader_context *ctx, - LLVMValueRef condition) -{ - LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder); - - memset(ifthen, 0, sizeof *ifthen); - ifthen->ctx = ctx; - ifthen->condition = condition; - ifthen->entry_block = block; - - /* create endif/merge basic block for the phi functions */ - ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); - - /* create/insert true_block before merge_block */ - ifthen->true_block = - LLVMInsertBasicBlockInContext(ctx->context, - ifthen->merge_block, - "if-true-block"); - - /* successive code goes into the true block */ - LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block); -} - -/** - * End a conditional. - */ -static void -ac_nir_build_endif(struct ac_build_if_state *ifthen) -{ - LLVMBuilderRef builder = ifthen->ctx->ac.builder; - - /* Insert branch to the merge block from current block */ - LLVMBuildBr(builder, ifthen->merge_block); - - /* - * Now patch in the various branch instructions. - */ - - /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); - if (ifthen->false_block) { - /* we have an else clause */ - LLVMBuildCondBr(builder, ifthen->condition, - ifthen->true_block, ifthen->false_block); - } - else { - /* no else clause */ - LLVMBuildCondBr(builder, ifthen->condition, - ifthen->true_block, ifthen->merge_block); - } - - /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); -} - static void write_tess_factors(struct radv_shader_context *ctx) { @@ -2593,13 +2752,11 @@ write_tess_factors(struct radv_shader_context *ctx) if (inner_comps) { tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); - mark_tess_output(ctx, true, tess_inner_index, 1); lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), ""); } tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); - mark_tess_output(ctx, true, tess_outer_index, 1); lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), ""); @@ -2608,7 +2765,7 @@ write_tess_factors(struct radv_shader_context *ctx) outer[i] = LLVMGetUndef(ctx->ac.i32); } - // LINES reverseal + // LINES reversal if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer); lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, @@ -2814,6 +2971,7 @@ handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); else handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, + ctx->options->key.vs.export_layer_id, &ctx->shader_info->vs.outinfo); break; case MESA_SHADER_FRAGMENT: @@ -2830,6 +2988,7 @@ handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info); else handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, + ctx->options->key.tes.export_layer_id, &ctx->shader_info->tes.outinfo); break; default: @@ -2837,30 +2996,12 @@ handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, } } -static void ac_llvm_finalize_module(struct radv_shader_context *ctx) +static void ac_llvm_finalize_module(struct radv_shader_context *ctx, + LLVMPassManagerRef passmgr, + const struct radv_nir_compiler_options *options) { - LLVMPassManagerRef passmgr; - /* Create the pass manager */ - passmgr = LLVMCreateFunctionPassManagerForModule( - ctx->ac.module); - - /* This pass should eliminate all the load and store instructions */ - LLVMAddPromoteMemoryToRegisterPass(passmgr); - - /* Add some optimization passes */ - LLVMAddScalarReplAggregatesPass(passmgr); - LLVMAddLICMPass(passmgr); - LLVMAddAggressiveDCEPass(passmgr); - LLVMAddCFGSimplificationPass(passmgr); - LLVMAddInstructionCombiningPass(passmgr); - - /* Run the pass */ - LLVMInitializeFunctionPassManager(passmgr); - LLVMRunFunctionPassManager(passmgr, ctx->main_function); - LLVMFinalizeFunctionPassManager(passmgr); - + LLVMRunPassManager(passmgr, ctx->ac.module); LLVMDisposeBuilder(ctx->ac.builder); - LLVMDisposePassManager(passmgr); ac_llvm_context_dispose(&ctx->ac); } @@ -2901,9 +3042,16 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) static void ac_setup_rings(struct radv_shader_context *ctx) { - if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || - (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false)); + if (ctx->options->chip_class <= VI && + (ctx->stage == MESA_SHADER_GEOMETRY || + ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) { + unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS + : RING_ESGS_VS; + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false); + + ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, + ctx->ring_offsets, + offset); } if (ctx->is_gs_copy_shader) { @@ -2911,14 +3059,19 @@ ac_setup_rings(struct radv_shader_context *ctx) } if (ctx->stage == MESA_SHADER_GEOMETRY) { LLVMValueRef tmp; - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false)); + uint32_t num_entries = 64; + LLVMValueRef gsvs_ring_stride = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size, false); + LLVMValueRef gsvs_ring_desc = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size << 16, false); ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false)); ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, ""); - ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), ""); + tmp = LLVMConstInt(ctx->ac.i32, num_entries, false); + if (ctx->options->chip_class >= VI) + tmp = LLVMBuildMul(ctx->ac.builder, gsvs_ring_stride, tmp, ""); + ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, LLVMConstInt(ctx->ac.i32, 2, false), ""); tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.i32_1, ""); - tmp = LLVMBuildOr(ctx->ac.builder, tmp, ctx->gsvs_ring_stride, ""); + tmp = LLVMBuildOr(ctx->ac.builder, tmp, gsvs_ring_desc, ""); ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, ""); } @@ -2959,7 +3112,6 @@ static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, ctx->ac.i32_0, ""); ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); - ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, ""); ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); } @@ -2979,29 +3131,20 @@ static void prepare_gs_input_vgprs(struct radv_shader_context *ctx) static -LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, +LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *const *shaders, int shader_count, struct radv_shader_variant_info *shader_info, - const struct radv_nir_compiler_options *options, - bool dump_shader) + const struct radv_nir_compiler_options *options) { struct radv_shader_context ctx = {0}; unsigned i; ctx.options = options; ctx.shader_info = shader_info; - ctx.context = LLVMContextCreate(); - - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); - LLVMSetTarget(ctx.ac.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); - LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); - char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); - LLVMSetDataLayout(ctx.ac.module, data_layout_str); - LLVMDisposeTargetData(data_layout); - LLVMDisposeMessage(data_layout_str); + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ctx.context = ctx.ac.context; + ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); enum ac_float_mode float_mode = options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : @@ -3037,6 +3180,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, ctx.abi.load_sampler_desc = radv_get_sampler_desc; ctx.abi.load_resource = radv_load_resource; ctx.abi.clamp_shadow_reference = false; + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9; if (shader_count >= 2) ac_init_exec_full_mask(&ctx.ac); @@ -3048,7 +3192,6 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, for(int i = 0; i < shader_count; ++i) { ctx.stage = shaders[i]->info.stage; ctx.output_mask = 0; - ctx.tess_outputs_written = 0; ctx.num_output_clips = shaders[i]->info.clip_distance_array_size; ctx.num_output_culls = shaders[i]->info.cull_distance_array_size; @@ -3064,12 +3207,18 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, ctx.abi.load_patch_vertices_in = load_patch_vertices_in; ctx.abi.store_tcs_outputs = store_tcs_output; ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; + if (shader_count == 1) + ctx.tcs_num_inputs = ctx.options->key.tcs.num_inputs; + else + ctx.tcs_num_inputs = util_last_bit64(shader_info->info.vs.ls_outputs_written); + ctx.tcs_num_patches = get_tcs_num_patches(&ctx); } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode; ctx.abi.load_tess_varyings = load_tes_input; ctx.abi.load_tess_coord = load_tess_coord; ctx.abi.load_patch_vertices_in = load_patch_vertices_in; ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; + ctx.tcs_num_patches = ctx.options->key.tes.num_patches; } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { if (shader_info->info.vs.needs_instance_id) { if (ctx.options->key.vs.as_ls) { @@ -3092,6 +3241,17 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, if (i) ac_emit_barrier(&ctx.ac, ctx.stage); + nir_foreach_variable(variable, &shaders[i]->outputs) + scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); + + if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { + unsigned addclip = shaders[i]->info.clip_distance_array_size + + shaders[i]->info.cull_distance_array_size > 4; + ctx.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; + ctx.max_gsvs_emit_size = ctx.gsvs_vertex_size * + shaders[i]->info.gs.vertices_out; + } + ac_setup_rings(&ctx); LLVMBasicBlockRef merge_block; @@ -3118,9 +3278,6 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY) prepare_gs_input_vgprs(&ctx); - nir_foreach_variable(variable, &shaders[i]->outputs) - scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); - ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]); if (shader_count >= 2) { @@ -3129,16 +3286,11 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, } if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { - unsigned addclip = shaders[i]->info.clip_distance_array_size + - shaders[i]->info.cull_distance_array_size > 4; - shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; - shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size * - shaders[i]->info.gs.vertices_out; + shader_info->gs.gsvs_vertex_size = ctx.gsvs_vertex_size; + shader_info->gs.max_gsvs_emit_size = ctx.max_gsvs_emit_size; } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { - shader_info->tcs.outputs_written = ctx.tess_outputs_written; - shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written; - } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) { - shader_info->vs.outputs_written = ctx.tess_outputs_written; + shader_info->tcs.num_patches = ctx.tcs_num_patches; + shader_info->tcs.lds_size = calculate_tess_lds_size(&ctx); } } @@ -3147,12 +3299,12 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, if (options->dump_preoptir) ac_dump_module(ctx.ac.module); - ac_llvm_finalize_module(&ctx); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); if (shader_count == 1) ac_nir_eliminate_const_vs_outputs(&ctx); - if (dump_shader) { + if (options->dump_shader) { ctx.shader_info->private_mem_vgprs = ac_count_scratch_private_memory(ctx.main_function); } @@ -3177,15 +3329,10 @@ static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) static unsigned ac_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, - LLVMTargetMachineRef tm) + struct ac_llvm_compiler *ac_llvm) { unsigned retval = 0; - char *err; LLVMContextRef llvm_ctx; - LLVMMemoryBufferRef out_buffer; - unsigned buffer_size; - const char *buffer_data; - LLVMBool mem_err; /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); @@ -3194,51 +3341,39 @@ static unsigned ac_llvm_compile(LLVMModuleRef M, &retval); /* Compile IR*/ - mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, - &err, &out_buffer); - - /* Process Errors/Warnings */ - if (mem_err) { - fprintf(stderr, "%s: %s", __FUNCTION__, err); - free(err); + if (!radv_compile_to_binary(ac_llvm, M, binary)) retval = 1; - goto out; - } - - /* Extract Shader Code*/ - buffer_size = LLVMGetBufferSize(out_buffer); - buffer_data = LLVMGetBufferStart(out_buffer); - - ac_elf_read(buffer_data, buffer_size, binary); - - /* Clean up */ - LLVMDisposeMemoryBuffer(out_buffer); - -out: return retval; } -static void ac_compile_llvm_module(LLVMTargetMachineRef tm, +static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, struct ac_shader_binary *binary, struct ac_shader_config *config, struct radv_shader_variant_info *shader_info, gl_shader_stage stage, - bool dump_shader, bool supports_spill) + const struct radv_nir_compiler_options *options) { - if (dump_shader) + if (options->dump_shader) ac_dump_module(llvm_module); memset(binary, 0, sizeof(*binary)); - int v = ac_llvm_compile(llvm_module, binary, tm); + + if (options->record_llvm_ir) { + char *llvm_ir = LLVMPrintModuleToString(llvm_module); + binary->llvm_ir_string = strdup(llvm_ir); + LLVMDisposeMessage(llvm_ir); + } + + int v = ac_llvm_compile(llvm_module, binary, ac_llvm); if (v) { fprintf(stderr, "compile failed\n"); } - if (dump_shader) + if (options->dump_shader) fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); - ac_shader_binary_read_config(binary, config, 0, supports_spill); + ac_shader_binary_read_config(binary, config, 0, options->supports_spill); LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); LLVMDisposeModule(llvm_module); @@ -3340,20 +3475,23 @@ ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_sha } void -radv_compile_nir_shader(LLVMTargetMachineRef tm, +radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct ac_shader_binary *binary, struct ac_shader_config *config, struct radv_shader_variant_info *shader_info, struct nir_shader *const *nir, int nir_count, - const struct radv_nir_compiler_options *options, - bool dump_shader) + const struct radv_nir_compiler_options *options) { - LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info, - options, dump_shader); + LLVMModuleRef llvm_module; + + llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info, + options); + + ac_compile_llvm_module(ac_llvm, llvm_module, binary, config, shader_info, + nir[0]->info.stage, options); - ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill); for (int i = 0; i < nir_count; ++i) ac_fill_shader_info(shader_info, nir[i], options); @@ -3405,29 +3543,26 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) } idx += slot_inc; } - handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo); + handle_vs_outputs_post(ctx, false, false, &ctx->shader_info->vs.outinfo); } void -radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, +radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, struct ac_shader_binary *binary, struct ac_shader_config *config, struct radv_shader_variant_info *shader_info, - const struct radv_nir_compiler_options *options, - bool dump_shader) + const struct radv_nir_compiler_options *options) { struct radv_shader_context ctx = {0}; - ctx.context = LLVMContextCreate(); ctx.options = options; ctx.shader_info = shader_info; - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ctx.context = ctx.ac.context; + ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); ctx.is_gs_copy_shader = true; - LLVMSetTarget(ctx.ac.module, "amdgcn--"); enum ac_float_mode float_mode = options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : @@ -3436,6 +3571,8 @@ radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, ctx.ac.builder = ac_create_builder(ctx.context, float_mode); ctx.stage = MESA_SHADER_VERTEX; + radv_nir_shader_info_pass(geom_shader, options, &shader_info->info); + create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX); ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out; @@ -3454,9 +3591,8 @@ radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, LLVMBuildRetVoid(ctx.ac.builder); - ac_llvm_finalize_module(&ctx); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); - ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info, - MESA_SHADER_VERTEX, - dump_shader, options->supports_spill); + ac_compile_llvm_module(ac_llvm, ctx.ac.module, binary, config, shader_info, + MESA_SHADER_VERTEX, options); }