X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_nir_to_llvm.c;h=c7d772fa652bad4ba5d32dfe3a6f3914aba0e2de;hb=4cf8f329edb3ad3482819de8dc091061ae19c5af;hp=2162ca58e08a6e8b0f1637d471b8f5bec0050d3a;hpb=ea43d935ab765575994557d1f923b570d4bd9085;p=mesa.git diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 2162ca58e08..c7d772fa652 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -27,6 +27,7 @@ #include "radv_private.h" #include "radv_shader.h" +#include "radv_shader_helper.h" #include "nir/nir.h" #include @@ -81,7 +82,6 @@ struct radv_shader_context { LLVMValueRef hs_ring_tess_offchip; LLVMValueRef hs_ring_tess_factor; - LLVMValueRef sample_pos_offset; LLVMValueRef persp_sample, persp_center, persp_centroid; LLVMValueRef linear_sample, linear_center, linear_centroid; @@ -480,7 +480,7 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, unsigned num_return_elems, struct arg_info *args, unsigned max_workgroup_size, - bool unsafe_math) + const struct radv_nir_compiler_options *options) { LLVMTypeRef main_function_type, ret_type; LLVMBasicBlockRef main_function_body; @@ -511,12 +511,18 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, } } + if (options->address32_hi) { + ac_llvm_add_target_dep_function_attr(main_function, + "amdgpu-32bit-address-high-bits", + options->address32_hi); + } + if (max_workgroup_size) { ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-max-work-group-size", max_workgroup_size); } - if (unsafe_math) { + if (options->unsafe_math) { /* These were copied from some LLVM test. */ LLVMAddTargetDependentFunctionAttr(main_function, "less-precise-fpmad", @@ -560,20 +566,31 @@ set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, set_loc(ud_info, sgpr_idx, num_sgprs, 0); } +static void +set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx) +{ + bool use_32bit_pointers = HAVE_32BIT_POINTERS && + idx != AC_UD_SCRATCH_RING_OFFSETS; + + set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2); +} + static void set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, uint32_t indirect_offset) { - struct radv_userdata_info *ud_info = - &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; + struct radv_userdata_locations *locs = + &ctx->shader_info->user_sgprs_locs; + struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx]; assert(ud_info); - set_loc(ud_info, sgpr_idx, 2, indirect_offset); + set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset); + if (indirect_offset == 0) + locs->descriptor_sets_enabled |= 1 << idx; } struct user_sgpr_info { bool need_ring_offsets; - uint8_t sgpr_count; bool indirect_all_descriptor_sets; }; @@ -606,7 +623,8 @@ count_vs_user_sgprs(struct radv_shader_context *ctx) { uint8_t count = 0; - count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; + if (ctx->shader_info->info.vs.has_vertex_buffers) + count += HAVE_32BIT_POINTERS ? 1 : 2; count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; return count; @@ -619,6 +637,8 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, bool needs_view_index, struct user_sgpr_info *user_sgpr_info) { + uint8_t user_sgpr_count = 0; + memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ @@ -635,25 +655,25 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, /* 2 user sgprs will nearly always be allocated for scratch/rings */ if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { - user_sgpr_info->sgpr_count += 2; + user_sgpr_count += 2; } switch (stage) { case MESA_SHADER_COMPUTE: if (ctx->shader_info->info.cs.uses_grid_size) - user_sgpr_info->sgpr_count += 3; + user_sgpr_count += 3; break; case MESA_SHADER_FRAGMENT: - user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; + user_sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; break; case MESA_SHADER_VERTEX: if (!ctx->is_gs_copy_shader) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); + user_sgpr_count += count_vs_user_sgprs(ctx); break; case MESA_SHADER_TESS_CTRL: if (has_previous_stage) { if (previous_stage == MESA_SHADER_VERTEX) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); + user_sgpr_count += count_vs_user_sgprs(ctx); } break; case MESA_SHADER_TESS_EVAL: @@ -661,7 +681,7 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, case MESA_SHADER_GEOMETRY: if (has_previous_stage) { if (previous_stage == MESA_SHADER_VERTEX) { - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); + user_sgpr_count += count_vs_user_sgprs(ctx); } } break; @@ -670,19 +690,18 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx, } if (needs_view_index) - user_sgpr_info->sgpr_count++; + user_sgpr_count++; if (ctx->shader_info->info.loads_push_constants) - user_sgpr_info->sgpr_count += 2; + user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2; uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; - uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_count; + uint32_t num_desc_set = + util_bitcount(ctx->shader_info->info.desc_set_used_mask); - if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { - user_sgpr_info->sgpr_count += 2; + if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) { user_sgpr_info->indirect_all_descriptor_sets = true; - } else { - user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; } } @@ -695,7 +714,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx, struct arg_info *args, LLVMValueRef *desc_sets) { - LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); + LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8); unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; unsigned stage_mask = 1 << stage; @@ -713,7 +732,7 @@ declare_global_input_sgprs(struct radv_shader_context *ctx, } } } else { - add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); + add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets); } if (ctx->shader_info->info.loads_push_constants) { @@ -733,7 +752,8 @@ declare_vs_specific_input_sgprs(struct radv_shader_context *ctx, (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), + add_arg(args, ARG_SGPR, + ac_array_in_const32_addr_space(ctx->ac.v4i32), &ctx->vertex_buffers); } add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); @@ -791,8 +811,8 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, ctx->descriptor_sets[i] = NULL; } } else { - set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, - user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, + user_sgpr_idx); for (unsigned i = 0; i < num_sets; ++i) { if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && @@ -810,7 +830,7 @@ set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, } if (ctx->shader_info->info.loads_push_constants) { - set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx); } } @@ -824,8 +844,8 @@ set_vs_specific_input_locs(struct radv_shader_context *ctx, (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { if (ctx->shader_info->info.vs.has_vertex_buffers) { - set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, - user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_VS_VERTEX_BUFFERS, + user_sgpr_idx); } unsigned vs_num = 2; @@ -851,7 +871,7 @@ static void set_llvm_calling_convention(LLVMValueRef func, calling_conv = RADEON_LLVM_AMDGPU_GS; break; case MESA_SHADER_TESS_CTRL: - calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS; + calling_conv = RADEON_LLVM_AMDGPU_HS; break; case MESA_SHADER_FRAGMENT: calling_conv = RADEON_LLVM_AMDGPU_PS; @@ -1078,10 +1098,6 @@ static void create_function(struct radv_shader_context *ctx, previous_stage, &user_sgpr_info, &args, &desc_sets); - if (ctx->shader_info->info.ps.needs_sample_positions) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->sample_pos_offset); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); @@ -1106,8 +1122,7 @@ static void create_function(struct radv_shader_context *ctx, ctx->main_function = create_llvm_function( ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args, - ctx->max_workgroup_size, - ctx->options->unsafe_math); + ctx->max_workgroup_size, ctx->options); set_llvm_calling_convention(ctx->main_function, stage); @@ -1124,8 +1139,8 @@ static void create_function(struct radv_shader_context *ctx, user_sgpr_idx = 0; if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, - &user_sgpr_idx, 2); + set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx); if (ctx->options->supports_spill) { ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE), @@ -1178,10 +1193,6 @@ static void create_function(struct radv_shader_context *ctx, set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); break; case MESA_SHADER_FRAGMENT: - if (ctx->shader_info->info.ps.needs_sample_positions) { - set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, - &user_sgpr_idx, 1); - } break; default: unreachable("Shader stage not implemented"); @@ -1611,6 +1622,30 @@ static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi, return NULL; } +static uint32_t +radv_get_sample_pos_offset(uint32_t num_samples) +{ + uint32_t sample_pos_offset = 0; + + switch (num_samples) { + case 2: + sample_pos_offset = 1; + break; + case 4: + sample_pos_offset = 3; + break; + case 8: + sample_pos_offset = 7; + break; + case 16: + sample_pos_offset = 15; + break; + default: + break; + } + return sample_pos_offset; +} + static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id) { @@ -1622,7 +1657,12 @@ static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), ""); - sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id, ctx->sample_pos_offset, ""); + uint32_t sample_pos_offset = + radv_get_sample_pos_offset(ctx->options->key.fs.num_samples); + + sample_id = + LLVMBuildAdd(ctx->ac.builder, sample_id, + LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), ""); result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); return result; @@ -1632,9 +1672,14 @@ static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) { struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - uint8_t log2_ps_iter_samples = ctx->shader_info->info.ps.force_persample ? - ctx->options->key.fs.log2_num_samples : - ctx->options->key.fs.log2_ps_iter_samples; + uint8_t log2_ps_iter_samples; + + if (ctx->shader_info->info.ps.force_persample) { + log2_ps_iter_samples = + util_logbase2(ctx->options->key.fs.num_samples); + } else { + log2_ps_iter_samples = ctx->options->key.fs.log2_ps_iter_samples; + } /* The bit pattern matches that used by fixed function fragment * processing. */ @@ -1684,6 +1729,8 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr /* loop num outputs */ idx = 0; for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + unsigned output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[i]; LLVMValueRef *out_ptr = &addrs[i * 4]; int length = 4; int slot = idx; @@ -1697,8 +1744,13 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr length = ctx->num_output_clips + ctx->num_output_culls; if (length > 4) slot_inc = 2; + output_usage_mask = (1 << length) - 1; } + for (unsigned j = 0; j < length; j++) { + if (!(output_usage_mask & (1 << j))) + continue; + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false); @@ -1860,7 +1912,8 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); - list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(type), ""); return ac_build_load_to_sgpr(&ctx->ac, list, index); } @@ -2069,9 +2122,6 @@ static void prepare_interp_optimize(struct radv_shader_context *ctx, struct nir_shader *nir) { - if (!ctx->options->key.fs.multisample) - return; - bool uses_center = false; bool uses_centroid = false; nir_foreach_variable(variable, &nir->inputs) { @@ -2493,10 +2543,9 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, output_usage_mask = ctx->shader_info->info.tes.output_usage_mask[i]; } else { - /* Enable all channels for the GS copy shader because - * we don't know the output usage mask currently. - */ - output_usage_mask = 0xf; + assert(ctx->is_gs_copy_shader); + output_usage_mask = + ctx->shader_info->info.gs.output_usage_mask[i]; } radv_export_param(ctx, param_count, values, output_usage_mask); @@ -2576,14 +2625,26 @@ handle_es_outputs_post(struct radv_shader_context *ctx, for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef dw_addr = NULL; LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; + unsigned output_usage_mask; int param_index; int length = 4; if (!(ctx->output_mask & (1ull << i))) continue; - if (i == VARYING_SLOT_CLIP_DIST0) + if (ctx->stage == MESA_SHADER_VERTEX) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[i]; + } else { + assert(ctx->stage == MESA_SHADER_TESS_EVAL); + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[i]; + } + + if (i == VARYING_SLOT_CLIP_DIST0) { length = ctx->num_output_clips + ctx->num_output_culls; + output_usage_mask = (1 << length) - 1; + } param_index = shader_io_get_unique_index(i); @@ -2592,14 +2653,22 @@ handle_es_outputs_post(struct radv_shader_context *ctx, LLVMConstInt(ctx->ac.i32, param_index * 4, false), ""); } + for (j = 0; j < length; j++) { + if (!(output_usage_mask & (1 << j))) + continue; + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); if (ctx->ac.chip_class >= GFX9) { - ac_lds_store(&ctx->ac, dw_addr, + LLVMValueRef dw_addr_offset = + LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, + j, false), ""); + + ac_lds_store(&ctx->ac, dw_addr_offset, LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); } else { ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, @@ -2927,30 +2996,12 @@ handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, } } -static void ac_llvm_finalize_module(struct radv_shader_context *ctx) +static void ac_llvm_finalize_module(struct radv_shader_context *ctx, + LLVMPassManagerRef passmgr, + const struct radv_nir_compiler_options *options) { - LLVMPassManagerRef passmgr; - /* Create the pass manager */ - passmgr = LLVMCreateFunctionPassManagerForModule( - ctx->ac.module); - - /* This pass should eliminate all the load and store instructions */ - LLVMAddPromoteMemoryToRegisterPass(passmgr); - - /* Add some optimization passes */ - LLVMAddScalarReplAggregatesPass(passmgr); - LLVMAddLICMPass(passmgr); - LLVMAddAggressiveDCEPass(passmgr); - LLVMAddCFGSimplificationPass(passmgr); - LLVMAddInstructionCombiningPass(passmgr); - - /* Run the pass */ - LLVMInitializeFunctionPassManager(passmgr); - LLVMRunFunctionPassManager(passmgr, ctx->main_function); - LLVMFinalizeFunctionPassManager(passmgr); - + LLVMRunPassManager(passmgr, ctx->ac.module); LLVMDisposeBuilder(ctx->ac.builder); - LLVMDisposePassManager(passmgr); ac_llvm_context_dispose(&ctx->ac); } @@ -2991,9 +3042,16 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) static void ac_setup_rings(struct radv_shader_context *ctx) { - if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || - (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false)); + if (ctx->options->chip_class <= VI && + (ctx->stage == MESA_SHADER_GEOMETRY || + ctx->options->key.vs.as_es || ctx->options->key.tes.as_es)) { + unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS + : RING_ESGS_VS; + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false); + + ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, + ctx->ring_offsets, + offset); } if (ctx->is_gs_copy_shader) { @@ -3004,7 +3062,6 @@ ac_setup_rings(struct radv_shader_context *ctx) uint32_t num_entries = 64; LLVMValueRef gsvs_ring_stride = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size, false); LLVMValueRef gsvs_ring_desc = LLVMConstInt(ctx->ac.i32, ctx->max_gsvs_emit_size << 16, false); - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false)); ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false)); ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, ""); @@ -3055,7 +3112,6 @@ static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, ctx->ac.i32_0, ""); ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); - ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, ""); ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); } @@ -3075,7 +3131,7 @@ static void prepare_gs_input_vgprs(struct radv_shader_context *ctx) static -LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, +LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *const *shaders, int shader_count, struct radv_shader_variant_info *shader_info, @@ -3085,18 +3141,10 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, unsigned i; ctx.options = options; ctx.shader_info = shader_info; - ctx.context = LLVMContextCreate(); - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); - LLVMSetTarget(ctx.ac.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); - - LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); - char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); - LLVMSetDataLayout(ctx.ac.module, data_layout_str); - LLVMDisposeTargetData(data_layout); - LLVMDisposeMessage(data_layout_str); + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ctx.context = ctx.ac.context; + ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); enum ac_float_mode float_mode = options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : @@ -3251,7 +3299,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, if (options->dump_preoptir) ac_dump_module(ctx.ac.module); - ac_llvm_finalize_module(&ctx); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); if (shader_count == 1) ac_nir_eliminate_const_vs_outputs(&ctx); @@ -3281,15 +3329,10 @@ static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) static unsigned ac_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary, - LLVMTargetMachineRef tm) + struct ac_llvm_compiler *ac_llvm) { unsigned retval = 0; - char *err; LLVMContextRef llvm_ctx; - LLVMMemoryBufferRef out_buffer; - unsigned buffer_size; - const char *buffer_data; - LLVMBool mem_err; /* Setup Diagnostic Handler*/ llvm_ctx = LLVMGetModuleContext(M); @@ -3298,31 +3341,12 @@ static unsigned ac_llvm_compile(LLVMModuleRef M, &retval); /* Compile IR*/ - mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, - &err, &out_buffer); - - /* Process Errors/Warnings */ - if (mem_err) { - fprintf(stderr, "%s: %s", __FUNCTION__, err); - free(err); + if (!radv_compile_to_binary(ac_llvm, M, binary)) retval = 1; - goto out; - } - - /* Extract Shader Code*/ - buffer_size = LLVMGetBufferSize(out_buffer); - buffer_data = LLVMGetBufferStart(out_buffer); - - ac_elf_read(buffer_data, buffer_size, binary); - - /* Clean up */ - LLVMDisposeMemoryBuffer(out_buffer); - -out: return retval; } -static void ac_compile_llvm_module(LLVMTargetMachineRef tm, +static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module, struct ac_shader_binary *binary, struct ac_shader_config *config, @@ -3341,7 +3365,7 @@ static void ac_compile_llvm_module(LLVMTargetMachineRef tm, LLVMDisposeMessage(llvm_ir); } - int v = ac_llvm_compile(llvm_module, binary, tm); + int v = ac_llvm_compile(llvm_module, binary, ac_llvm); if (v) { fprintf(stderr, "compile failed\n"); } @@ -3451,7 +3475,7 @@ ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_sha } void -radv_compile_nir_shader(LLVMTargetMachineRef tm, +radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct ac_shader_binary *binary, struct ac_shader_config *config, struct radv_shader_variant_info *shader_info, @@ -3462,10 +3486,10 @@ radv_compile_nir_shader(LLVMTargetMachineRef tm, LLVMModuleRef llvm_module; - llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info, + llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info, options); - ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, + ac_compile_llvm_module(ac_llvm, llvm_module, binary, config, shader_info, nir[0]->info.stage, options); for (int i = 0; i < nir_count; ++i) @@ -3523,7 +3547,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx) } void -radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, +radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader, struct ac_shader_binary *binary, struct ac_shader_config *config, @@ -3531,16 +3555,14 @@ radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, const struct radv_nir_compiler_options *options) { struct radv_shader_context ctx = {0}; - ctx.context = LLVMContextCreate(); ctx.options = options; ctx.shader_info = shader_info; - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); + ac_llvm_context_init(&ctx.ac, options->chip_class, options->family); + ctx.context = ctx.ac.context; + ctx.ac.module = ac_create_module(ac_llvm->tm, ctx.context); ctx.is_gs_copy_shader = true; - LLVMSetTarget(ctx.ac.module, "amdgcn--"); enum ac_float_mode float_mode = options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : @@ -3569,8 +3591,8 @@ radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, LLVMBuildRetVoid(ctx.ac.builder); - ac_llvm_finalize_module(&ctx); + ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); - ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info, + ac_compile_llvm_module(ac_llvm, ctx.ac.module, binary, config, shader_info, MESA_SHADER_VERTEX, options); }