X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_nir_to_llvm.c;h=ecddb5e9b9b10923e4223db8c0df6b5f72f74605;hb=4ba201ee361a07a084665e022efd6b0d6df2ffd6;hp=cfbdeae1a3b08ed3a6e30c59712707d8da04bcff;hpb=931a8d0c9a15df462f14ab40f9ae31c8ecf75376;p=mesa.git diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index cfbdeae1a3b..ecddb5e9b9b 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -30,6 +30,9 @@ #include "../vulkan/radv_descriptor_set.h" #include "util/bitscan.h" #include +#include "ac_shader_abi.h" +#include "ac_shader_info.h" +#include "ac_exp_param.h" enum radeon_llvm_calling_convention { RADEON_LLVM_AMDGPU_VS = 87, @@ -44,18 +47,38 @@ enum radeon_llvm_calling_convention { #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1) -enum desc_type { - DESC_IMAGE, - DESC_FMASK, - DESC_SAMPLER, - DESC_BUFFER, +struct nir_to_llvm_context; + +struct ac_nir_context { + struct ac_llvm_context ac; + struct ac_shader_abi *abi; + + gl_shader_stage stage; + + struct hash_table *defs; + struct hash_table *phis; + struct hash_table *vars; + + LLVMValueRef main_function; + LLVMBasicBlockRef continue_block; + LLVMBasicBlockRef break_block; + + LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; + + int num_locals; + LLVMValueRef *locals; + + struct nir_to_llvm_context *nctx; /* TODO get rid of this */ }; struct nir_to_llvm_context { struct ac_llvm_context ac; const struct ac_nir_compiler_options *options; struct ac_shader_variant_info *shader_info; + struct ac_shader_abi abi; + struct ac_nir_context *nir; + unsigned max_workgroup_size; LLVMContextRef context; LLVMModuleRef module; LLVMBuilderRef builder; @@ -73,16 +96,24 @@ struct nir_to_llvm_context { LLVMValueRef tg_size; LLVMValueRef vertex_buffers; - LLVMValueRef base_vertex; - LLVMValueRef start_instance; - LLVMValueRef draw_index; - LLVMValueRef vertex_id; LLVMValueRef rel_auto_id; LLVMValueRef vs_prim_id; - LLVMValueRef instance_id; - + LLVMValueRef ls_out_layout; LLVMValueRef es2gs_offset; + LLVMValueRef tcs_offchip_layout; + LLVMValueRef tcs_out_offsets; + LLVMValueRef tcs_out_layout; + LLVMValueRef tcs_in_layout; + LLVMValueRef oc_lds; + LLVMValueRef tess_factor_offset; + LLVMValueRef tcs_patch_id; + LLVMValueRef tcs_rel_ids; + LLVMValueRef tes_rel_patch_id; + LLVMValueRef tes_patch_id; + LLVMValueRef tes_u; + LLVMValueRef tes_v; + LLVMValueRef gsvs_ring_stride; LLVMValueRef gsvs_num_entries; LLVMValueRef gs2vs_offset; @@ -92,9 +123,11 @@ struct nir_to_llvm_context { LLVMValueRef esgs_ring; LLVMValueRef gsvs_ring; + LLVMValueRef hs_ring_tess_offchip; + LLVMValueRef hs_ring_tess_factor; LLVMValueRef prim_mask; - LLVMValueRef sample_positions; + LLVMValueRef sample_pos_offset; LLVMValueRef persp_sample, persp_center, persp_centroid; LLVMValueRef linear_sample, linear_center, linear_centroid; LLVMValueRef front_face; @@ -102,9 +135,6 @@ struct nir_to_llvm_context { LLVMValueRef sample_coverage; LLVMValueRef frag_pos[4]; - LLVMBasicBlockRef continue_block; - LLVMBasicBlockRef break_block; - LLVMTypeRef i1; LLVMTypeRef i8; LLVMTypeRef i16; @@ -119,7 +149,6 @@ struct nir_to_llvm_context { LLVMTypeRef f16; LLVMTypeRef v2f32; LLVMTypeRef v4f32; - LLVMTypeRef v16i8; LLVMTypeRef voidt; LLVMValueRef i1true; @@ -136,16 +165,9 @@ struct nir_to_llvm_context { LLVMValueRef lds; LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; - LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4]; - LLVMValueRef shared_memory; uint64_t input_mask; uint64_t output_mask; - int num_locals; - LLVMValueRef *locals; - bool has_ddxy; - uint8_t num_input_clips; - uint8_t num_input_culls; uint8_t num_output_clips; uint8_t num_output_culls; @@ -154,11 +176,24 @@ struct nir_to_llvm_context { bool is_gs_copy_shader; LLVMValueRef gs_next_vertex; unsigned gs_max_out_vertices; + + unsigned tes_primitive_mode; + uint64_t tess_outputs_written; + uint64_t tess_patch_outputs_written; }; -static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, - nir_deref_var *deref, - enum desc_type desc_type); +static inline struct nir_to_llvm_context * +nir_to_llvm_context_from_abi(struct ac_shader_abi *abi) +{ + struct nir_to_llvm_context *ctx = NULL; + return container_of(abi, ctx, abi); +} + +static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, + const nir_deref_var *deref, + enum ac_descriptor_type desc_type, + bool image, bool write); + static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) { return (index * 4) + chan; @@ -166,16 +201,21 @@ static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) static unsigned shader_io_get_unique_index(gl_varying_slot slot) { + /* handle patch indices separate */ + if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) + return 0; + if (slot == VARYING_SLOT_TESS_LEVEL_INNER) + return 1; + if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) + return 2 + (slot - VARYING_SLOT_PATCH0); + if (slot == VARYING_SLOT_POS) return 0; if (slot == VARYING_SLOT_PSIZ) return 1; - if (slot == VARYING_SLOT_CLIP_DIST0 || - slot == VARYING_SLOT_CULL_DIST0) + if (slot == VARYING_SLOT_CLIP_DIST0) return 2; - if (slot == VARYING_SLOT_CLIP_DIST1 || - slot == VARYING_SLOT_CULL_DIST1) - return 3; + /* 3 is reserved for clip dist as well */ if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) return 4 + (slot - VARYING_SLOT_VAR0); unreachable("illegal slot in get unique index\n"); @@ -228,12 +268,83 @@ static void set_llvm_calling_convention(LLVMValueRef func, LLVMSetFunctionCallConv(func, calling_conv); } +#define MAX_ARGS 23 +struct arg_info { + LLVMTypeRef types[MAX_ARGS]; + LLVMValueRef *assign[MAX_ARGS]; + unsigned array_params_mask; + uint8_t count; + uint8_t user_sgpr_count; + uint8_t sgpr_count; + uint8_t num_user_sgprs_used; + uint8_t num_sgprs_used; + uint8_t num_vgprs_used; +}; + +static inline void +add_argument(struct arg_info *info, + LLVMTypeRef type, LLVMValueRef *param_ptr) +{ + assert(info->count < MAX_ARGS); + info->assign[info->count] = param_ptr; + info->types[info->count] = type; + info->count++; +} + +static inline void +add_sgpr_argument(struct arg_info *info, + LLVMTypeRef type, LLVMValueRef *param_ptr) +{ + add_argument(info, type, param_ptr); + info->num_sgprs_used += llvm_get_type_size(type) / 4; + info->sgpr_count++; +} + +static inline void +add_user_sgpr_argument(struct arg_info *info, + LLVMTypeRef type, + LLVMValueRef *param_ptr) +{ + add_sgpr_argument(info, type, param_ptr); + info->num_user_sgprs_used += llvm_get_type_size(type) / 4; + info->user_sgpr_count++; +} + +static inline void +add_vgpr_argument(struct arg_info *info, + LLVMTypeRef type, + LLVMValueRef *param_ptr) +{ + add_argument(info, type, param_ptr); + info->num_vgprs_used += llvm_get_type_size(type) / 4; +} + +static inline void +add_user_sgpr_array_argument(struct arg_info *info, + LLVMTypeRef type, + LLVMValueRef *param_ptr) +{ + info->array_params_mask |= (1 << info->count); + add_user_sgpr_argument(info, type, param_ptr); +} + +static void assign_arguments(LLVMValueRef main_function, + struct arg_info *info) +{ + unsigned i; + for (i = 0; i < info->count; i++) { + if (info->assign[i]) + *info->assign[i] = LLVMGetParam(main_function, i); + } +} + static LLVMValueRef create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, LLVMBuilderRef builder, LLVMTypeRef *return_types, - unsigned num_return_elems, LLVMTypeRef *param_types, - unsigned param_count, unsigned array_params_mask, - unsigned sgpr_params, bool unsafe_math) + unsigned num_return_elems, + struct arg_info *args, + unsigned max_workgroup_size, + bool unsafe_math) { LLVMTypeRef main_function_type, ret_type; LLVMBasicBlockRef main_function_body; @@ -246,7 +357,7 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, /* Setup the function */ main_function_type = - LLVMFunctionType(ret_type, param_types, param_count, 0); + LLVMFunctionType(ret_type, args->types, args->count, 0); LLVMValueRef main_function = LLVMAddFunction(module, "main", main_function_type); main_function_body = @@ -254,8 +365,8 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, LLVMPositionBuilderAtEnd(builder, main_function_body); LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); - for (unsigned i = 0; i < sgpr_params; ++i) { - if (array_params_mask & (1 << i)) { + for (unsigned i = 0; i < args->sgpr_count; ++i) { + if (args->array_params_mask & (1 << i)) { LLVMValueRef P = LLVMGetParam(main_function, i); ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL); ac_add_attr_dereferenceable(P, UINT64_MAX); @@ -265,6 +376,11 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, } } + if (max_workgroup_size) { + ac_llvm_add_target_dep_function_attr(main_function, + "amdgpu-max-work-group-size", + max_workgroup_size); + } if (unsafe_math) { /* These were copied from some LLVM test. */ LLVMAddTargetDependentFunctionAttr(main_function, @@ -289,24 +405,7 @@ static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements) CONST_ADDR_SPACE); } -static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx, - int idx, - LLVMTypeRef type) -{ - LLVMValueRef offset; - LLVMValueRef ptr; - int addr_space; - - offset = LLVMConstInt(ctx->i32, idx * 16, false); - - ptr = ctx->shared_memory; - ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, ""); - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), ""); - return ptr; -} - -static LLVMTypeRef to_integer_type_scalar(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (t == ctx->f16 || t == ctx->i16) return ctx->i16; @@ -318,7 +417,7 @@ static LLVMTypeRef to_integer_type_scalar(struct nir_to_llvm_context *ctx, LLVMT unreachable("Unhandled integer size"); } -static LLVMTypeRef to_integer_type(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +static LLVMTypeRef to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { LLVMTypeRef elem_type = LLVMGetElementType(t); @@ -328,13 +427,13 @@ static LLVMTypeRef to_integer_type(struct nir_to_llvm_context *ctx, LLVMTypeRef return to_integer_type_scalar(ctx, t); } -static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v) +static LLVMValueRef to_integer(struct ac_llvm_context *ctx, LLVMValueRef v) { LLVMTypeRef type = LLVMTypeOf(v); return LLVMBuildBitCast(ctx->builder, v, to_integer_type(ctx, type), ""); } -static LLVMTypeRef to_float_type_scalar(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (t == ctx->i16 || t == ctx->f16) return ctx->f16; @@ -346,7 +445,7 @@ static LLVMTypeRef to_float_type_scalar(struct nir_to_llvm_context *ctx, LLVMTyp unreachable("Unhandled float size"); } -static LLVMTypeRef to_float_type(struct nir_to_llvm_context *ctx, LLVMTypeRef t) +static LLVMTypeRef to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t) { if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) { LLVMTypeRef elem_type = LLVMGetElementType(t); @@ -356,13 +455,13 @@ static LLVMTypeRef to_float_type(struct nir_to_llvm_context *ctx, LLVMTypeRef t) return to_float_type_scalar(ctx, t); } -static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v) +static LLVMValueRef to_float(struct ac_llvm_context *ctx, LLVMValueRef v) { LLVMTypeRef type = LLVMTypeOf(v); return LLVMBuildBitCast(ctx->builder, v, to_float_type(ctx, type), ""); } -static int get_elem_bits(struct nir_to_llvm_context *ctx, LLVMTypeRef type) +static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type) { if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) type = LLVMGetElementType(type); @@ -397,21 +496,126 @@ static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx, return value; } -static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs) +static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx) { - ud_info->sgpr_idx = sgpr_idx; + switch (ctx->stage) { + case MESA_SHADER_TESS_CTRL: + return unpack_param(ctx, ctx->tcs_rel_ids, 0, 8); + case MESA_SHADER_TESS_EVAL: + return ctx->tes_rel_patch_id; + break; + default: + unreachable("Illegal stage"); + } +} + +/* Tessellation shaders pass outputs to the next shader using LDS. + * + * LS outputs = TCS inputs + * TCS outputs = TES inputs + * + * The LDS layout is: + * - TCS inputs for patch 0 + * - TCS inputs for patch 1 + * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) + * - ... + * - TCS outputs for patch 0 = get_tcs_out_patch0_offset + * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset + * - TCS outputs for patch 1 + * - Per-patch TCS outputs for patch 1 + * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) + * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) + * - ... + * + * All three shaders VS(LS), TCS, TES share the same LDS space. + */ +static LLVMValueRef +get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx) +{ + if (ctx->stage == MESA_SHADER_VERTEX) + return unpack_param(ctx, ctx->ls_out_layout, 0, 13); + else if (ctx->stage == MESA_SHADER_TESS_CTRL) + return unpack_param(ctx, ctx->tcs_in_layout, 0, 13); + else { + assert(0); + return NULL; + } +} + +static LLVMValueRef +get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx) +{ + return unpack_param(ctx, ctx->tcs_out_layout, 0, 13); +} + +static LLVMValueRef +get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx) +{ + return LLVMBuildMul(ctx->builder, + unpack_param(ctx, ctx->tcs_out_offsets, 0, 16), + LLVMConstInt(ctx->i32, 4, false), ""); +} + +static LLVMValueRef +get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx) +{ + return LLVMBuildMul(ctx->builder, + unpack_param(ctx, ctx->tcs_out_offsets, 16, 16), + LLVMConstInt(ctx->i32, 4, false), ""); +} + +static LLVMValueRef +get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx) +{ + LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, ""); +} + +static LLVMValueRef +get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx) +{ + LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx); + LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return LLVMBuildAdd(ctx->builder, patch0_offset, + LLVMBuildMul(ctx->builder, patch_stride, + rel_patch_id, ""), + ""); +} + +static LLVMValueRef +get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx) +{ + LLVMValueRef patch0_patch_data_offset = + get_tcs_out_patch0_patch_data_offset(ctx); + LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset, + LLVMBuildMul(ctx->builder, patch_stride, + rel_patch_id, ""), + ""); +} + +static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs) +{ + ud_info->sgpr_idx = *sgpr_idx; ud_info->num_sgprs = num_sgprs; ud_info->indirect = false; ud_info->indirect_offset = 0; + *sgpr_idx += num_sgprs; } static void set_userdata_location_shader(struct nir_to_llvm_context *ctx, - int idx, uint8_t sgpr_idx, uint8_t num_sgprs) + int idx, uint8_t *sgpr_idx, uint8_t num_sgprs) { set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs); } -#if 0 + static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs, uint32_t indirect_offset) { @@ -420,242 +624,302 @@ static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uin ud_info->indirect = true; ud_info->indirect_offset = indirect_offset; } -#endif -static void create_function(struct nir_to_llvm_context *ctx) +static void declare_tess_lds(struct nir_to_llvm_context *ctx) { - LLVMTypeRef arg_types[23]; - unsigned arg_idx = 0; - unsigned array_params_mask = 0; - unsigned sgpr_count = 0, user_sgpr_count; - unsigned i; - unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; - unsigned user_sgpr_idx; - bool need_push_constants; - bool need_ring_offsets = false; + unsigned lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768; + ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32zero, + LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE), + "tess_lds"); +} + +struct user_sgpr_info { + bool need_ring_offsets; + uint8_t sgpr_count; + bool indirect_all_descriptor_sets; +}; + +static void allocate_user_sgprs(struct nir_to_llvm_context *ctx, + struct user_sgpr_info *user_sgpr_info) +{ + memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ if (ctx->stage == MESA_SHADER_GEOMETRY || ctx->stage == MESA_SHADER_VERTEX || + ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_TESS_EVAL || ctx->is_gs_copy_shader) - need_ring_offsets = true; + user_sgpr_info->need_ring_offsets = true; - need_push_constants = true; - if (!ctx->options->layout) - need_push_constants = false; - else if (!ctx->options->layout->push_constant_size && - !ctx->options->layout->dynamic_offset_count) - need_push_constants = false; + if (ctx->stage == MESA_SHADER_FRAGMENT && + ctx->shader_info->info.ps.needs_sample_positions) + user_sgpr_info->need_ring_offsets = true; - if (need_ring_offsets && !ctx->options->supports_spill) { - arg_types[arg_idx++] = const_array(ctx->v16i8, 8); /* address of rings */ + /* 2 user sgprs will nearly always be allocated for scratch/rings */ + if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { + user_sgpr_info->sgpr_count += 2; } - /* 1 for each descriptor set */ - for (unsigned i = 0; i < num_sets; ++i) { - if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { - array_params_mask |= (1 << arg_idx); - arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024); + switch (ctx->stage) { + case MESA_SHADER_COMPUTE: + user_sgpr_info->sgpr_count += ctx->shader_info->info.cs.grid_components_used; + break; + case MESA_SHADER_FRAGMENT: + user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; + break; + case MESA_SHADER_VERTEX: + if (!ctx->is_gs_copy_shader) { + user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; + if (ctx->shader_info->info.vs.needs_draw_id) { + user_sgpr_info->sgpr_count += 3; + } else { + user_sgpr_info->sgpr_count += 2; + } } + if (ctx->options->key.vs.as_ls) + user_sgpr_info->sgpr_count++; + break; + case MESA_SHADER_TESS_CTRL: + user_sgpr_info->sgpr_count += 4; + break; + case MESA_SHADER_TESS_EVAL: + user_sgpr_info->sgpr_count += 1; + break; + case MESA_SHADER_GEOMETRY: + user_sgpr_info->sgpr_count += 2; + break; + default: + break; + } + + if (ctx->shader_info->info.needs_push_constants) + user_sgpr_info->sgpr_count += 2; + + uint32_t remaining_sgprs = 16 - user_sgpr_info->sgpr_count; + if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { + user_sgpr_info->sgpr_count += 2; + user_sgpr_info->indirect_all_descriptor_sets = true; + } else { + user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; } +} + +static void create_function(struct nir_to_llvm_context *ctx) +{ + unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0; + uint8_t user_sgpr_idx; + struct user_sgpr_info user_sgpr_info; + struct arg_info args = {}; + LLVMValueRef desc_sets; + + allocate_user_sgprs(ctx, &user_sgpr_info); + if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { + add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->ring_offsets); /* address of rings */ + } + + /* 1 for each descriptor set */ + if (!user_sgpr_info.indirect_all_descriptor_sets) { + for (unsigned i = 0; i < num_sets; ++i) { + if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { + add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->descriptor_sets[i]); + } + } + } else + add_user_sgpr_array_argument(&args, const_array(const_array(ctx->i8, 1024 * 1024), 32), &desc_sets); - if (need_push_constants) { + if (ctx->shader_info->info.needs_push_constants) { /* 1 for push constants and dynamic descriptors */ - array_params_mask |= (1 << arg_idx); - arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024); + add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->push_constants); } switch (ctx->stage) { case MESA_SHADER_COMPUTE: - arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */ - user_sgpr_count = arg_idx; - arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); - arg_types[arg_idx++] = ctx->i32; - sgpr_count = arg_idx; - - arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); + if (ctx->shader_info->info.cs.grid_components_used) + add_user_sgpr_argument(&args, LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used), &ctx->num_work_groups); /* grid size */ + add_sgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->workgroup_ids); + add_sgpr_argument(&args, ctx->i32, &ctx->tg_size); + add_vgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->local_invocation_ids); break; case MESA_SHADER_VERTEX: if (!ctx->is_gs_copy_shader) { - arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */ - arg_types[arg_idx++] = ctx->i32; // base vertex - arg_types[arg_idx++] = ctx->i32; // start instance - arg_types[arg_idx++] = ctx->i32; // draw index + if (ctx->shader_info->info.vs.has_vertex_buffers) + add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->vertex_buffers); /* vertex buffers */ + add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.base_vertex); // base vertex + add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.start_instance);// start instance + if (ctx->shader_info->info.vs.needs_draw_id) + add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.draw_id); // draw id } - user_sgpr_count = arg_idx; if (ctx->options->key.vs.as_es) - arg_types[arg_idx++] = ctx->i32; //es2gs offset - sgpr_count = arg_idx; - arg_types[arg_idx++] = ctx->i32; // vertex id + add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset + else if (ctx->options->key.vs.as_ls) + add_user_sgpr_argument(&args, ctx->i32, &ctx->ls_out_layout); // ls out layout + add_vgpr_argument(&args, ctx->i32, &ctx->abi.vertex_id); // vertex id if (!ctx->is_gs_copy_shader) { - arg_types[arg_idx++] = ctx->i32; // rel auto id - arg_types[arg_idx++] = ctx->i32; // vs prim id - arg_types[arg_idx++] = ctx->i32; // instance id + add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id + add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id + add_vgpr_argument(&args, ctx->i32, &ctx->abi.instance_id); // instance id + } + break; + case MESA_SHADER_TESS_CTRL: + add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout + add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets + add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout + add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout + add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds + add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset + add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id + add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids; + break; + case MESA_SHADER_TESS_EVAL: + add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout + if (ctx->options->key.tes.as_es) { + add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS + add_sgpr_argument(&args, ctx->i32, NULL); // + add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset + } else { + add_sgpr_argument(&args, ctx->i32, NULL); // + add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS } + add_vgpr_argument(&args, ctx->f32, &ctx->tes_u); // tes_u + add_vgpr_argument(&args, ctx->f32, &ctx->tes_v); // tes_v + add_vgpr_argument(&args, ctx->i32, &ctx->tes_rel_patch_id); // tes rel patch id + add_vgpr_argument(&args, ctx->i32, &ctx->tes_patch_id); // tes patch id break; case MESA_SHADER_GEOMETRY: - arg_types[arg_idx++] = ctx->i32; // gsvs stride - arg_types[arg_idx++] = ctx->i32; // gsvs num entires - user_sgpr_count = arg_idx; - arg_types[arg_idx++] = ctx->i32; // gs2vs offset - arg_types[arg_idx++] = ctx->i32; // wave id - sgpr_count = arg_idx; - arg_types[arg_idx++] = ctx->i32; // vtx0 - arg_types[arg_idx++] = ctx->i32; // vtx1 - arg_types[arg_idx++] = ctx->i32; // prim id - arg_types[arg_idx++] = ctx->i32; // vtx2 - arg_types[arg_idx++] = ctx->i32; // vtx3 - arg_types[arg_idx++] = ctx->i32; // vtx4 - arg_types[arg_idx++] = ctx->i32; // vtx5 - arg_types[arg_idx++] = ctx->i32; // GS instance id + add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride + add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires + add_sgpr_argument(&args, ctx->i32, &ctx->gs2vs_offset); // gs2vs offset + add_sgpr_argument(&args, ctx->i32, &ctx->gs_wave_id); // wave id + add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[0]); // vtx0 + add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[1]); // vtx1 + add_vgpr_argument(&args, ctx->i32, &ctx->gs_prim_id); // prim id + add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[2]); + add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[3]); + add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[4]); + add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[5]); + add_vgpr_argument(&args, ctx->i32, &ctx->gs_invocation_id); break; case MESA_SHADER_FRAGMENT: - arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */ - user_sgpr_count = arg_idx; - arg_types[arg_idx++] = ctx->i32; /* prim mask */ - sgpr_count = arg_idx; - arg_types[arg_idx++] = ctx->v2i32; /* persp sample */ - arg_types[arg_idx++] = ctx->v2i32; /* persp center */ - arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */ - arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */ - arg_types[arg_idx++] = ctx->v2i32; /* linear sample */ - arg_types[arg_idx++] = ctx->v2i32; /* linear center */ - arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */ - arg_types[arg_idx++] = ctx->f32; /* line stipple tex */ - arg_types[arg_idx++] = ctx->f32; /* pos x float */ - arg_types[arg_idx++] = ctx->f32; /* pos y float */ - arg_types[arg_idx++] = ctx->f32; /* pos z float */ - arg_types[arg_idx++] = ctx->f32; /* pos w float */ - arg_types[arg_idx++] = ctx->i32; /* front face */ - arg_types[arg_idx++] = ctx->i32; /* ancillary */ - arg_types[arg_idx++] = ctx->i32; /* sample coverage */ - arg_types[arg_idx++] = ctx->i32; /* fixed pt */ + if (ctx->shader_info->info.ps.needs_sample_positions) + add_user_sgpr_argument(&args, ctx->i32, &ctx->sample_pos_offset); /* sample position offset */ + add_sgpr_argument(&args, ctx->i32, &ctx->prim_mask); /* prim mask */ + add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_sample); /* persp sample */ + add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_center); /* persp center */ + add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_centroid); /* persp centroid */ + add_vgpr_argument(&args, ctx->v3i32, NULL); /* persp pull model */ + add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_sample); /* linear sample */ + add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_center); /* linear center */ + add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_centroid); /* linear centroid */ + add_vgpr_argument(&args, ctx->f32, NULL); /* line stipple tex */ + add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[0]); /* pos x float */ + add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[1]); /* pos y float */ + add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[2]); /* pos z float */ + add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[3]); /* pos w float */ + add_vgpr_argument(&args, ctx->i32, &ctx->front_face); /* front face */ + add_vgpr_argument(&args, ctx->i32, &ctx->ancillary); /* ancillary */ + add_vgpr_argument(&args, ctx->i32, &ctx->sample_coverage); /* sample coverage */ + add_vgpr_argument(&args, ctx->i32, NULL); /* fixed pt */ break; default: unreachable("Shader stage not implemented"); } ctx->main_function = create_llvm_function( - ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types, - arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math); + ctx->context, ctx->module, ctx->builder, NULL, 0, &args, + ctx->max_workgroup_size, + ctx->options->unsafe_math); set_llvm_calling_convention(ctx->main_function, ctx->stage); - ctx->shader_info->num_input_sgprs = 0; - ctx->shader_info->num_input_vgprs = 0; - ctx->shader_info->num_user_sgprs = ctx->options->supports_spill ? 2 : 0; - for (i = 0; i < user_sgpr_count; i++) - ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4; + ctx->shader_info->num_input_vgprs = 0; + ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs = + ctx->options->supports_spill ? 2 : 0; - ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs; - for (; i < sgpr_count; i++) - ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4; + ctx->shader_info->num_user_sgprs += args.num_user_sgprs_used; + ctx->shader_info->num_input_sgprs += args.num_sgprs_used; if (ctx->stage != MESA_SHADER_FRAGMENT) - for (; i < arg_idx; ++i) - ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4; + ctx->shader_info->num_input_vgprs = args.num_vgprs_used; + + assign_arguments(ctx->main_function, &args); - arg_idx = 0; user_sgpr_idx = 0; - if (ctx->options->supports_spill || need_ring_offsets) { - set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, user_sgpr_idx, 2); - user_sgpr_idx += 2; + if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { + set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx, 2); if (ctx->options->supports_spill) { ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", LLVMPointerType(ctx->i8, CONST_ADDR_SPACE), NULL, 0, AC_FUNC_ATTR_READNONE); ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets, - const_array(ctx->v16i8, 8), ""); - } else - ctx->ring_offsets = LLVMGetParam(ctx->main_function, arg_idx++); + const_array(ctx->v4i32, 16), ""); + } } - for (unsigned i = 0; i < num_sets; ++i) { - if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { - set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2); - user_sgpr_idx += 2; - ctx->descriptor_sets[i] = - LLVMGetParam(ctx->main_function, arg_idx++); - } else - ctx->descriptor_sets[i] = NULL; + if (!user_sgpr_info.indirect_all_descriptor_sets) { + for (unsigned i = 0; i < num_sets; ++i) { + if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { + set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], &user_sgpr_idx, 2); + } else + ctx->descriptor_sets[i] = NULL; + } + } else { + uint32_t desc_sgpr_idx = user_sgpr_idx; + set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2); + + for (unsigned i = 0; i < num_sets; ++i) { + if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) { + set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8); + ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false)); + + } else + ctx->descriptor_sets[i] = NULL; + } + ctx->shader_info->need_indirect_descriptor_sets = true; } - if (need_push_constants) { - ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++); - set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); - user_sgpr_idx += 2; + if (ctx->shader_info->info.needs_push_constants) { + set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, &user_sgpr_idx, 2); } switch (ctx->stage) { case MESA_SHADER_COMPUTE: - set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3); - user_sgpr_idx += 3; - ctx->num_work_groups = - LLVMGetParam(ctx->main_function, arg_idx++); - ctx->workgroup_ids = - LLVMGetParam(ctx->main_function, arg_idx++); - ctx->tg_size = - LLVMGetParam(ctx->main_function, arg_idx++); - ctx->local_invocation_ids = - LLVMGetParam(ctx->main_function, arg_idx++); + if (ctx->shader_info->info.cs.grid_components_used) { + set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used); + } break; case MESA_SHADER_VERTEX: if (!ctx->is_gs_copy_shader) { - set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2); - user_sgpr_idx += 2; - ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++); - set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 3); - user_sgpr_idx += 3; - ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->draw_index = LLVMGetParam(ctx->main_function, arg_idx++); + if (ctx->shader_info->info.vs.has_vertex_buffers) { + set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, &user_sgpr_idx, 2); + } + unsigned vs_num = 2; + if (ctx->shader_info->info.vs.needs_draw_id) + vs_num++; + + set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, &user_sgpr_idx, vs_num); } - if (ctx->options->key.vs.as_es) - ctx->es2gs_offset = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++); - if (!ctx->is_gs_copy_shader) { - ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++); + if (ctx->options->key.vs.as_ls) { + set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1); } + if (ctx->options->key.vs.as_ls) + declare_tess_lds(ctx); + break; + case MESA_SHADER_TESS_CTRL: + set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); + declare_tess_lds(ctx); + break; + case MESA_SHADER_TESS_EVAL: + set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); break; case MESA_SHADER_GEOMETRY: - set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, user_sgpr_idx, 2); - user_sgpr_idx += 2; - ctx->gsvs_ring_stride = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gsvs_num_entries = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs2vs_offset = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_wave_id = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_vtx_offset[0] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_vtx_offset[1] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_vtx_offset[2] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_vtx_offset[3] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_vtx_offset[4] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_vtx_offset[5] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->gs_invocation_id = LLVMGetParam(ctx->main_function, arg_idx++); + set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, &user_sgpr_idx, 2); break; case MESA_SHADER_FRAGMENT: - set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2); - user_sgpr_idx += 2; - ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++); - arg_idx++; - ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++); - arg_idx++; /* line stipple */ - ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++); - ctx->sample_coverage = LLVMGetParam(ctx->main_function, arg_idx++); + if (ctx->shader_info->info.ps.needs_sample_positions) { + set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, &user_sgpr_idx, 1); + } break; default: unreachable("Shader stage not implemented"); @@ -681,7 +945,6 @@ static void setup_types(struct nir_to_llvm_context *ctx) ctx->f64 = LLVMDoubleTypeInContext(ctx->context); ctx->v2f32 = LLVMVectorType(ctx->f32, 2); ctx->v4f32 = LLVMVectorType(ctx->f32, 4); - ctx->v16i8 = LLVMVectorType(ctx->i8, 16); ctx->i1false = LLVMConstInt(ctx->i1, 0, false); ctx->i1true = LLVMConstInt(ctx->i1, 1, false); @@ -712,7 +975,7 @@ static int get_llvm_num_components(LLVMValueRef value) return num_components; } -static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx, +static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac, LLVMValueRef value, int index) { @@ -722,11 +985,11 @@ static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx, if (count == 1) return value; - return LLVMBuildExtractElement(ctx->builder, value, - LLVMConstInt(ctx->i32, index, false), ""); + return LLVMBuildExtractElement(ac->builder, value, + LLVMConstInt(ac->i32, index, false), ""); } -static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx, +static LLVMValueRef trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count) { unsigned num_components = get_llvm_num_components(value); @@ -746,13 +1009,13 @@ static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx, } static void -build_store_values_extended(struct nir_to_llvm_context *ctx, +build_store_values_extended(struct ac_llvm_context *ac, LLVMValueRef *values, unsigned value_count, unsigned value_stride, LLVMValueRef vec) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ac->builder; unsigned i; if (value_count == 1) { @@ -762,38 +1025,38 @@ build_store_values_extended(struct nir_to_llvm_context *ctx, for (i = 0; i < value_count; i++) { LLVMValueRef ptr = values[i * value_stride]; - LLVMValueRef index = LLVMConstInt(ctx->i32, i, false); + LLVMValueRef index = LLVMConstInt(ac->i32, i, false); LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); LLVMBuildStore(builder, value, ptr); } } -static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx, - nir_ssa_def *def) +static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, + const nir_ssa_def *def) { - LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size); + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size); if (def->num_components > 1) { type = LLVMVectorType(type, def->num_components); } return type; } -static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src) +static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src) { assert(src.is_ssa); - struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa); + struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa); return (LLVMValueRef)entry->data; } -static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx, - struct nir_block *b) +static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, + const struct nir_block *b) { - struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b); + struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b); return (LLVMBasicBlockRef)entry->data; } -static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx, +static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src, unsigned num_components) { @@ -814,20 +1077,20 @@ static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx, if (need_swizzle || num_components != src_components) { LLVMValueRef masks[] = { - LLVMConstInt(ctx->i32, src.swizzle[0], false), - LLVMConstInt(ctx->i32, src.swizzle[1], false), - LLVMConstInt(ctx->i32, src.swizzle[2], false), - LLVMConstInt(ctx->i32, src.swizzle[3], false)}; + LLVMConstInt(ctx->ac.i32, src.swizzle[0], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[1], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[2], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)}; if (src_components > 1 && num_components == 1) { - value = LLVMBuildExtractElement(ctx->builder, value, + value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], ""); } else if (src_components == 1 && num_components > 1) { LLVMValueRef values[] = {value, value, value, value}; value = ac_build_gather_values(&ctx->ac, values, num_components); } else { LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - value = LLVMBuildShuffleVector(ctx->builder, value, value, + value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, ""); } } @@ -836,7 +1099,7 @@ static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx, return value; } -static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred, LLVMValueRef src0, LLVMValueRef src1) { @@ -846,7 +1109,7 @@ static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->i32, 0, false), ""); } -static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred, LLVMValueRef src0, LLVMValueRef src1) { @@ -859,7 +1122,7 @@ static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->i32, 0, false), ""); } -static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin, LLVMTypeRef result_type, LLVMValueRef src0) @@ -869,11 +1132,13 @@ static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx, to_float(ctx, src0), }; - sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type)); - return ac_build_intrinsic(&ctx->ac, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); + MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, + get_elem_bits(ctx, result_type)); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); } -static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin, LLVMTypeRef result_type, LLVMValueRef src0, LLVMValueRef src1) @@ -884,11 +1149,13 @@ static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx, to_float(ctx, src1), }; - sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type)); - return ac_build_intrinsic(&ctx->ac, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); + MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, + get_elem_bits(ctx, result_type)); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); } -static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin, LLVMTypeRef result_type, LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) @@ -900,19 +1167,21 @@ static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx, to_float(ctx, src2), }; - sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type)); - return ac_build_intrinsic(&ctx->ac, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); + MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, + get_elem_bits(ctx, result_type)); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); } -static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) { LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, - ctx->i32zero, ""); + ctx->i32_0, ""); return LLVMBuildSelect(ctx->builder, v, src1, src2, ""); } -static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx, LLVMValueRef src0) { LLVMValueRef params[2] = { @@ -925,24 +1194,24 @@ static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx, * * The hardware already implements the correct behavior. */ - LLVMConstInt(ctx->i32, 1, false), + LLVMConstInt(ctx->i1, 1, false), }; - return ac_build_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE); + return ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE); } -static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx, LLVMValueRef src0) { - return ac_build_imsb(&ctx->ac, src0, ctx->i32); + return ac_build_imsb(ctx, src0, ctx->i32); } -static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_ufind_msb(struct ac_llvm_context *ctx, LLVMValueRef src0) { - return ac_build_umsb(&ctx->ac, src0, ctx->i32); + return ac_build_umsb(ctx, src0, ctx->i32); } -static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx, LLVMIntPredicate pred, LLVMValueRef src0, LLVMValueRef src1) { @@ -952,38 +1221,38 @@ static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx, src1, ""); } -static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0) { return emit_minmax_int(ctx, LLVMIntSGT, src0, LLVMBuildNeg(ctx->builder, src0, "")); } -static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0) { LLVMValueRef cmp, val; - cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, ""); - val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, ""); - cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, ""); + cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32_0, ""); + val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32_1, src0, ""); + cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32_0, ""); val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), ""); return val; } -static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_isign(struct ac_llvm_context *ctx, LLVMValueRef src0) { LLVMValueRef cmp, val; - cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, ""); - val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, ""); - cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, ""); + cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32_0, ""); + val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32_1, src0, ""); + cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32_0, ""); val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), ""); return val; } -static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx, LLVMValueRef src0) { const char *intr = "llvm.floor.f32"; @@ -991,13 +1260,13 @@ static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx, LLVMValueRef params[] = { fsrc0, }; - LLVMValueRef floor = ac_build_intrinsic(&ctx->ac, intr, + LLVMValueRef floor = ac_build_intrinsic(ctx, intr, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE); return LLVMBuildFSub(ctx->builder, fsrc0, floor, ""); } -static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin, LLVMValueRef src0, LLVMValueRef src1) { @@ -1008,7 +1277,7 @@ static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx, ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true); - res = ac_build_intrinsic(&ctx->ac, intrin, ret_type, + res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE); res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); @@ -1016,13 +1285,63 @@ static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx, return res; } -static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0) { return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); } -static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, + LLVMValueRef src0) +{ + src0 = to_float(ctx, src0); + return LLVMBuildSExt(ctx->builder, + LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""), + ctx->i32, ""); +} + +static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, + LLVMValueRef src0) +{ + return LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); +} + +static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, + LLVMValueRef src0) +{ + return LLVMBuildSExt(ctx->builder, + LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""), + ctx->i32, ""); +} + +static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx, + LLVMValueRef src0) +{ + LLVMValueRef result; + LLVMValueRef cond; + + src0 = to_float(&ctx->ac, src0); + result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); + + /* TODO SI/CIK options here */ + if (ctx->options->chip_class >= VI) { + LLVMValueRef args[2]; + /* Check if the result is a denormal - and flush to 0 if so. */ + args[0] = result; + args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false); + cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE); + } + + /* need to convert back up to f32 */ + result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); + + if (ctx->options->chip_class >= VI) + result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, ""); + + return result; +} + +static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1) { LLVMValueRef dst64, result; @@ -1035,7 +1354,7 @@ static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx, return result; } -static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1) { LLVMValueRef dst64, result; @@ -1048,19 +1367,19 @@ static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx, return result; } -static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx, bool is_signed, - LLVMValueRef srcs[3]) + const LLVMValueRef srcs[3]) { LLVMValueRef result; LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); - result = ac_build_bfe(&ctx->ac, srcs[0], srcs[1], srcs[2], is_signed); + result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); return result; } -static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2, LLVMValueRef src3) { @@ -1069,9 +1388,9 @@ static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx, bfi_args[0] = LLVMBuildShl(ctx->builder, LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, - ctx->i32one, + ctx->i32_1, src3, ""), - ctx->i32one, ""), + ctx->i32_1, ""), src2, ""); bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, ""); bfi_args[2] = src0; @@ -1090,7 +1409,7 @@ static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx, return result; } -static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0) { LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); @@ -1098,8 +1417,8 @@ static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx, LLVMValueRef comp[2]; src0 = to_float(ctx, src0); - comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""); - comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""); + comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); + comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); for (i = 0; i < 2; i++) { comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, ""); comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, ""); @@ -1112,7 +1431,7 @@ static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx, return comp[0]; } -static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx, +static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0) { LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); @@ -1126,10 +1445,11 @@ static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx, temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); } - result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0], - ctx->i32zero, ""); + LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2); + result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0], + ctx->i32_0, ""); result = LLVMBuildInsertElement(ctx->builder, result, temps[1], - ctx->i32one, ""); + ctx->i32_1, ""); return result; } @@ -1140,7 +1460,6 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx, unsigned mask; int idx; LLVMValueRef result; - ctx->has_ddxy = true; if (!ctx->lds && !ctx->has_ds_bpermute) ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module, @@ -1189,7 +1508,7 @@ static LLVMValueRef emit_ddxy_interp( return ac_build_gather_values(&ctx->ac, result, 4); } -static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) +static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) { LLVMValueRef src[4], result = NULL; unsigned num_components = instr->dest.dest.ssa.num_components; @@ -1222,309 +1541,332 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) result = src[0]; break; case nir_op_fneg: - src[0] = to_float(ctx, src[0]); - result = LLVMBuildFNeg(ctx->builder, src[0], ""); + src[0] = to_float(&ctx->ac, src[0]); + result = LLVMBuildFNeg(ctx->ac.builder, src[0], ""); break; case nir_op_ineg: - result = LLVMBuildNeg(ctx->builder, src[0], ""); + result = LLVMBuildNeg(ctx->ac.builder, src[0], ""); break; case nir_op_inot: - result = LLVMBuildNot(ctx->builder, src[0], ""); + result = LLVMBuildNot(ctx->ac.builder, src[0], ""); break; case nir_op_iadd: - result = LLVMBuildAdd(ctx->builder, src[0], src[1], ""); + result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fadd: - src[0] = to_float(ctx, src[0]); - src[1] = to_float(ctx, src[1]); - result = LLVMBuildFAdd(ctx->builder, src[0], src[1], ""); + src[0] = to_float(&ctx->ac, src[0]); + src[1] = to_float(&ctx->ac, src[1]); + result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fsub: - src[0] = to_float(ctx, src[0]); - src[1] = to_float(ctx, src[1]); - result = LLVMBuildFSub(ctx->builder, src[0], src[1], ""); + src[0] = to_float(&ctx->ac, src[0]); + src[1] = to_float(&ctx->ac, src[1]); + result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_isub: - result = LLVMBuildSub(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_imul: - result = LLVMBuildMul(ctx->builder, src[0], src[1], ""); + result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_imod: - result = LLVMBuildSRem(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_umod: - result = LLVMBuildURem(ctx->builder, src[0], src[1], ""); + result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fmod: - src[0] = to_float(ctx, src[0]); - src[1] = to_float(ctx, src[1]); + src[0] = to_float(&ctx->ac, src[0]); + src[1] = to_float(&ctx->ac, src[1]); result = ac_build_fdiv(&ctx->ac, src[0], src[1]); - result = emit_intrin_1f_param(ctx, "llvm.floor", - to_float_type(ctx, def_type), result); - result = LLVMBuildFMul(ctx->builder, src[1] , result, ""); - result = LLVMBuildFSub(ctx->builder, src[0], result, ""); + result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", + to_float_type(&ctx->ac, def_type), result); + result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, ""); + result = LLVMBuildFSub(ctx->ac.builder, src[0], result, ""); break; case nir_op_frem: - src[0] = to_float(ctx, src[0]); - src[1] = to_float(ctx, src[1]); - result = LLVMBuildFRem(ctx->builder, src[0], src[1], ""); + src[0] = to_float(&ctx->ac, src[0]); + src[1] = to_float(&ctx->ac, src[1]); + result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_irem: - result = LLVMBuildSRem(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_idiv: - result = LLVMBuildSDiv(ctx->builder, src[0], src[1], ""); + result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_udiv: - result = LLVMBuildUDiv(ctx->builder, src[0], src[1], ""); + result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fmul: - src[0] = to_float(ctx, src[0]); - src[1] = to_float(ctx, src[1]); - result = LLVMBuildFMul(ctx->builder, src[0], src[1], ""); + src[0] = to_float(&ctx->ac, src[0]); + src[1] = to_float(&ctx->ac, src[1]); + result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_fdiv: - src[0] = to_float(ctx, src[0]); - src[1] = to_float(ctx, src[1]); + src[0] = to_float(&ctx->ac, src[0]); + src[1] = to_float(&ctx->ac, src[1]); result = ac_build_fdiv(&ctx->ac, src[0], src[1]); break; case nir_op_frcp: - src[0] = to_float(ctx, src[0]); - result = ac_build_fdiv(&ctx->ac, ctx->f32one, src[0]); + src[0] = to_float(&ctx->ac, src[0]); + result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, src[0]); break; case nir_op_iand: - result = LLVMBuildAnd(ctx->builder, src[0], src[1], ""); + result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_ior: - result = LLVMBuildOr(ctx->builder, src[0], src[1], ""); + result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_ixor: - result = LLVMBuildXor(ctx->builder, src[0], src[1], ""); + result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], ""); break; case nir_op_ishl: - result = LLVMBuildShl(ctx->builder, src[0], src[1], ""); + result = LLVMBuildShl(ctx->ac.builder, src[0], + LLVMBuildZExt(ctx->ac.builder, src[1], + LLVMTypeOf(src[0]), ""), + ""); break; case nir_op_ishr: - result = LLVMBuildAShr(ctx->builder, src[0], src[1], ""); + result = LLVMBuildAShr(ctx->ac.builder, src[0], + LLVMBuildZExt(ctx->ac.builder, src[1], + LLVMTypeOf(src[0]), ""), + ""); break; case nir_op_ushr: - result = LLVMBuildLShr(ctx->builder, src[0], src[1], ""); + result = LLVMBuildLShr(ctx->ac.builder, src[0], + LLVMBuildZExt(ctx->ac.builder, src[1], + LLVMTypeOf(src[0]), ""), + ""); break; case nir_op_ilt: - result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]); + result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]); break; case nir_op_ine: - result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]); + result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]); break; case nir_op_ieq: - result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]); + result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]); break; case nir_op_ige: - result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]); + result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]); break; case nir_op_ult: - result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]); + result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]); break; case nir_op_uge: - result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]); + result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]); break; case nir_op_feq: - result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]); + result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]); break; case nir_op_fne: - result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]); + result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]); break; case nir_op_flt: - result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]); + result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]); break; case nir_op_fge: - result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]); + result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]); break; case nir_op_fabs: - result = emit_intrin_1f_param(ctx, "llvm.fabs", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_iabs: - result = emit_iabs(ctx, src[0]); + result = emit_iabs(&ctx->ac, src[0]); break; case nir_op_imax: - result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]); + result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]); break; case nir_op_imin: - result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]); + result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]); break; case nir_op_umax: - result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]); + result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]); break; case nir_op_umin: - result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]); + result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]); break; case nir_op_isign: - result = emit_isign(ctx, src[0]); + result = emit_isign(&ctx->ac, src[0]); break; case nir_op_fsign: - src[0] = to_float(ctx, src[0]); - result = emit_fsign(ctx, src[0]); + src[0] = to_float(&ctx->ac, src[0]); + result = emit_fsign(&ctx->ac, src[0]); break; case nir_op_ffloor: - result = emit_intrin_1f_param(ctx, "llvm.floor", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_ftrunc: - result = emit_intrin_1f_param(ctx, "llvm.trunc", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_fceil: - result = emit_intrin_1f_param(ctx, "llvm.ceil", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_fround_even: - result = emit_intrin_1f_param(ctx, "llvm.rint", - to_float_type(ctx, def_type),src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.rint", + to_float_type(&ctx->ac, def_type),src[0]); break; case nir_op_ffract: - result = emit_ffract(ctx, src[0]); + result = emit_ffract(&ctx->ac, src[0]); break; case nir_op_fsin: - result = emit_intrin_1f_param(ctx, "llvm.sin", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.sin", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_fcos: - result = emit_intrin_1f_param(ctx, "llvm.cos", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.cos", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_fsqrt: - result = emit_intrin_1f_param(ctx, "llvm.sqrt", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_fexp2: - result = emit_intrin_1f_param(ctx, "llvm.exp2", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_flog2: - result = emit_intrin_1f_param(ctx, "llvm.log2", - to_float_type(ctx, def_type), src[0]); + result = emit_intrin_1f_param(&ctx->ac, "llvm.log2", + to_float_type(&ctx->ac, def_type), src[0]); break; case nir_op_frsq: - result = emit_intrin_1f_param(ctx, "llvm.sqrt", - to_float_type(ctx, def_type), src[0]); - result = ac_build_fdiv(&ctx->ac, ctx->f32one, result); + result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", + to_float_type(&ctx->ac, def_type), src[0]); + result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, result); break; case nir_op_fpow: - result = emit_intrin_2f_param(ctx, "llvm.pow", - to_float_type(ctx, def_type), src[0], src[1]); + result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", + to_float_type(&ctx->ac, def_type), src[0], src[1]); break; case nir_op_fmax: - result = emit_intrin_2f_param(ctx, "llvm.maxnum", - to_float_type(ctx, def_type), src[0], src[1]); + result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", + to_float_type(&ctx->ac, def_type), src[0], src[1]); + if (instr->dest.dest.ssa.bit_size == 32) + result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", + to_float_type(&ctx->ac, def_type), + result); break; case nir_op_fmin: - result = emit_intrin_2f_param(ctx, "llvm.minnum", - to_float_type(ctx, def_type), src[0], src[1]); + result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", + to_float_type(&ctx->ac, def_type), src[0], src[1]); + if (instr->dest.dest.ssa.bit_size == 32) + result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", + to_float_type(&ctx->ac, def_type), + result); break; case nir_op_ffma: - result = emit_intrin_3f_param(ctx, "llvm.fma", - to_float_type(ctx, def_type), src[0], src[1], src[2]); + result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", + to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; case nir_op_ibitfield_extract: - result = emit_bitfield_extract(ctx, true, src); + result = emit_bitfield_extract(&ctx->ac, true, src); break; case nir_op_ubitfield_extract: - result = emit_bitfield_extract(ctx, false, src); + result = emit_bitfield_extract(&ctx->ac, false, src); break; case nir_op_bitfield_insert: - result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]); + result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]); break; case nir_op_bitfield_reverse: - result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_bit_count: - result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE); + result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); break; case nir_op_vec2: case nir_op_vec3: case nir_op_vec4: for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - src[i] = to_integer(ctx, src[i]); + src[i] = to_integer(&ctx->ac, src[i]); result = ac_build_gather_values(&ctx->ac, src, num_components); break; case nir_op_f2i32: case nir_op_f2i64: - src[0] = to_float(ctx, src[0]); - result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, ""); + src[0] = to_float(&ctx->ac, src[0]); + result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_f2u32: case nir_op_f2u64: - src[0] = to_float(ctx, src[0]); - result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, ""); + src[0] = to_float(&ctx->ac, src[0]); + result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_i2f32: case nir_op_i2f64: - result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), ""); + result = LLVMBuildSIToFP(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_u2f32: case nir_op_u2f64: - result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), ""); + result = LLVMBuildUIToFP(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_f2f64: - result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(ctx, def_type), ""); + result = LLVMBuildFPExt(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_f2f32: - result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(ctx, def_type), ""); + result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), ""); break; case nir_op_u2u32: case nir_op_u2u64: - if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) - result = LLVMBuildZExt(ctx->builder, src[0], def_type, ""); + if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); else - result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_i2i32: case nir_op_i2i64: - if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type)) - result = LLVMBuildSExt(ctx->builder, src[0], def_type, ""); + if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); else - result = LLVMBuildTrunc(ctx->builder, src[0], def_type, ""); + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); break; case nir_op_bcsel: - result = emit_bcsel(ctx, src[0], src[1], src[2]); + result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); break; case nir_op_find_lsb: - result = emit_find_lsb(ctx, src[0]); + result = emit_find_lsb(&ctx->ac, src[0]); break; case nir_op_ufind_msb: - result = emit_ufind_msb(ctx, src[0]); + result = emit_ufind_msb(&ctx->ac, src[0]); break; case nir_op_ifind_msb: - result = emit_ifind_msb(ctx, src[0]); + result = emit_ifind_msb(&ctx->ac, src[0]); break; case nir_op_uadd_carry: - result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]); + result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]); break; case nir_op_usub_borrow: - result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]); + result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); break; case nir_op_b2f: - result = emit_b2f(ctx, src[0]); + result = emit_b2f(&ctx->ac, src[0]); + break; + case nir_op_f2b: + result = emit_f2b(&ctx->ac, src[0]); + break; + case nir_op_b2i: + result = emit_b2i(&ctx->ac, src[0]); + break; + case nir_op_i2b: + result = emit_i2b(&ctx->ac, src[0]); break; case nir_op_fquantize2f16: - src[0] = to_float(ctx, src[0]); - result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, ""); - /* need to convert back up to f32 */ - result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); + result = emit_f2f16(ctx->nctx, src[0]); break; case nir_op_umul_high: - result = emit_umul_high(ctx, src[0], src[1]); + result = emit_umul_high(&ctx->ac, src[0], src[1]); break; case nir_op_imul_high: - result = emit_imul_high(ctx, src[0], src[1]); + result = emit_imul_high(&ctx->ac, src[0], src[1]); break; case nir_op_pack_half_2x16: - result = emit_pack_half_2x16(ctx, src[0]); + result = emit_pack_half_2x16(&ctx->ac, src[0]); break; case nir_op_unpack_half_2x16: - result = emit_unpack_half_2x16(ctx, src[0]); + result = emit_unpack_half_2x16(&ctx->ac, src[0]); break; case nir_op_fddx: case nir_op_fddy: @@ -1532,8 +1874,39 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) case nir_op_fddy_fine: case nir_op_fddx_coarse: case nir_op_fddy_coarse: - result = emit_ddxy(ctx, instr->op, src[0]); + result = emit_ddxy(ctx->nctx, instr->op, src[0]); + break; + + case nir_op_unpack_64_2x32_split_x: { + assert(instr->src[0].src.ssa->num_components == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], + LLVMVectorType(ctx->ac.i32, 2), + ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, + ctx->ac.i32_0, ""); + break; + } + + case nir_op_unpack_64_2x32_split_y: { + assert(instr->src[0].src.ssa->num_components == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], + LLVMVectorType(ctx->ac.i32, 2), + ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, + ctx->ac.i32_0, ""); + break; + } + + case nir_op_pack_64_2x32_split: { + LLVMValueRef tmp = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, 2)); + tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, + src[0], ctx->ac.i32_0, ""); + tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, + src[1], ctx->ac.i32_1, ""); + result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); break; + } + default: fprintf(stderr, "Unknown NIR alu instr: "); nir_print_instr(&instr->instr, stderr); @@ -1543,18 +1916,18 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr) if (result) { assert(instr->dest.dest.is_ssa); - result = to_integer(ctx, result); + result = to_integer(&ctx->ac, result); _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa, result); } } -static void visit_load_const(struct nir_to_llvm_context *ctx, - nir_load_const_instr *instr) +static void visit_load_const(struct ac_nir_context *ctx, + const nir_load_const_instr *instr) { LLVMValueRef values[4], value = NULL; LLVMTypeRef element_type = - LLVMIntTypeInContext(ctx->context, instr->def.bit_size); + LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); for (unsigned i = 0; i < instr->def.num_components; ++i) { switch (instr->def.bit_size) { @@ -1590,27 +1963,27 @@ static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr, } static LLVMValueRef -get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements) +get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements) { LLVMValueRef size = - LLVMBuildExtractElement(ctx->builder, descriptor, - LLVMConstInt(ctx->i32, 2, false), ""); + LLVMBuildExtractElement(ctx->ac.builder, descriptor, + LLVMConstInt(ctx->ac.i32, 2, false), ""); /* VI only */ - if (ctx->options->chip_class >= VI && in_elements) { + if (ctx->abi->chip_class >= VI && in_elements) { /* On VI, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ LLVMValueRef stride = - LLVMBuildExtractElement(ctx->builder, descriptor, - LLVMConstInt(ctx->i32, 1, false), ""); - stride = LLVMBuildLShr(ctx->builder, stride, - LLVMConstInt(ctx->i32, 16, false), ""); - stride = LLVMBuildAnd(ctx->builder, stride, - LLVMConstInt(ctx->i32, 0x3fff, false), ""); + LLVMBuildExtractElement(ctx->ac.builder, descriptor, + LLVMConstInt(ctx->ac.i32, 1, false), ""); + stride = LLVMBuildLShr(ctx->ac.builder, stride, + LLVMConstInt(ctx->ac.i32, 16, false), ""); + stride = LLVMBuildAnd(ctx->ac.builder, stride, + LLVMConstInt(ctx->ac.i32, 0x3fff, false), ""); - size = LLVMBuildUDiv(ctx->builder, size, stride, ""); + size = LLVMBuildUDiv(ctx->ac.builder, size, stride, ""); } return size; } @@ -1632,9 +2005,9 @@ static void build_int_type_name( strcpy(buf, "i32"); } -static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, +static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx, struct ac_image_args *args, - nir_tex_instr *instr) + const nir_tex_instr *instr) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); LLVMValueRef coord = args->addr; @@ -1651,15 +2024,15 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; txq_args.opcode = ac_image_get_resinfo; txq_args.dmask = 0xf; - txq_args.addr = ctx->i32zero; + txq_args.addr = ctx->i32_0; txq_args.resource = args->resource; - LLVMValueRef size = ac_build_image_opcode(&ctx->ac, &txq_args); + LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); for (c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); - half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->f32one, half_texel[c]); + half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), ""); } @@ -1695,11 +2068,11 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, /* workaround 8/8/8/8 uint/sint cube gather bug */ /* first detect it then change to a scaled read and f2i */ - tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, ""); + tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, ""); tmp2 = tmp; /* extract the DATA_FORMAT */ - tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false), + tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false), LLVMConstInt(ctx->i32, 6, false), false); /* is the DATA_FORMAT == 8_8_8_8 */ @@ -1715,16 +2088,16 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, LLVMConstInt(ctx->i32, 0x14000000, false), ""); /* replace the NUM FORMAT in the descriptor */ - tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), ""); + tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), ""); tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, ""); - args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, ""); + args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); /* don't modify the coordinates for this case */ coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); } args->addr = coord; - result = ac_build_image_opcode(&ctx->ac, args); + result = ac_build_image_opcode(ctx, args); if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { LLVMValueRef tmp, tmp2; @@ -1746,15 +2119,16 @@ static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx, return result; } -static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, - nir_tex_instr *instr, +static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, + const nir_tex_instr *instr, + bool lod_is_zero, struct ac_image_args *args) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { return ac_build_buffer_load_format(&ctx->ac, args->resource, args->addr, - LLVMConstInt(ctx->i32, 0, false), + LLVMConstInt(ctx->ac.i32, 0, false), true); } @@ -1773,7 +2147,10 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, args->bias = true; break; case nir_texop_txl: - args->lod = true; + if (lod_is_zero) + args->level_zero = true; + else + args->lod = true; break; case nir_texop_txs: case nir_texop_query_levels: @@ -1802,7 +2179,7 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, if (instr->op == nir_texop_tg4) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { - return radv_lower_gather4_integer(ctx, args, instr); + return radv_lower_gather4_integer(&ctx->ac, args, instr); } } return ac_build_image_opcode(&ctx->ac, args); @@ -1811,7 +2188,7 @@ static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx, static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { - LLVMValueRef index = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx->nir, instr->src[0]); unsigned desc_set = nir_intrinsic_desc_set(instr); unsigned binding = nir_intrinsic_binding(instr); LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; @@ -1847,16 +2224,16 @@ static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx, LLVMValueRef ptr, addr; addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0); - addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), ""); + addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), ""); ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr); - ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa)); + ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa)); return LLVMBuildLoad(ctx->builder, ptr, ""); } -static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { LLVMValueRef desc = get_src(ctx, instr->src[0]); @@ -1866,9 +2243,9 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, nir_intrinsic_instr *instr) { const char *store_name; - LLVMValueRef src_data = get_src(ctx, instr->src[0]); + LLVMValueRef src_data = get_src(ctx->nir, instr->src[0]); LLVMTypeRef data_type = ctx->f32; - int elem_size_mult = get_elem_bits(ctx, LLVMTypeOf(src_data)) / 32; + int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32; int components_32bit = elem_size_mult * instr->num_components; unsigned writemask = nir_intrinsic_write_mask(instr); LLVMValueRef base_data, base_offset; @@ -1877,7 +2254,7 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, if (ctx->stage == MESA_SHADER_FRAGMENT) ctx->shader_info->fs.writes_memory = true; - params[1] = get_src(ctx, instr->src[1]); + params[1] = get_src(ctx->nir, instr->src[1]); params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ params[4] = ctx->i1false; /* glc */ params[5] = ctx->i1false; /* slc */ @@ -1885,11 +2262,11 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, if (components_32bit > 1) data_type = LLVMVectorType(ctx->f32, components_32bit); - base_data = to_float(ctx, src_data); - base_data = trim_vector(ctx, base_data, instr->num_components); + base_data = to_float(&ctx->ac, src_data); + base_data = trim_vector(&ctx->ac, base_data, instr->num_components); base_data = LLVMBuildBitCast(ctx->builder, base_data, data_type, ""); - base_offset = get_src(ctx, instr->src[2]); /* voffset */ + base_offset = get_src(ctx->nir, instr->src[2]); /* voffset */ while (writemask) { int start, count; LLVMValueRef data; @@ -1949,7 +2326,7 @@ static void visit_store_ssbo(struct nir_to_llvm_context *ctx, } static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) + const nir_intrinsic_instr *instr) { const char *name; LLVMValueRef params[6]; @@ -1958,12 +2335,12 @@ static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, ctx->shader_info->fs.writes_memory = true; if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { - params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0); + params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx->nir, instr->src[3]), 0); } - params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0); - params[arg_count++] = get_src(ctx, instr->src[0]); + params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx->nir, instr->src[2]), 0); + params[arg_count++] = get_src(ctx->nir, instr->src[0]); params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */ - params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ + params[arg_count++] = get_src(ctx->nir, instr->src[1]); /* voffset */ params[arg_count++] = ctx->i1false; /* slc */ switch (instr->intrinsic) { @@ -2005,7 +2382,7 @@ static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx, } static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) + const nir_intrinsic_instr *instr) { LLVMValueRef results[2]; int load_components; @@ -2018,7 +2395,7 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, const char *load_name; LLVMTypeRef data_type = ctx->f32; LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false); - offset = LLVMBuildAdd(ctx->builder, get_src(ctx, instr->src[1]), offset, ""); + offset = LLVMBuildAdd(ctx->builder, get_src(ctx->nir, instr->src[1]), offset, ""); if (load_components == 3) data_type = LLVMVectorType(ctx->f32, 4); @@ -2035,7 +2412,7 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, unreachable("unhandled number of components"); LLVMValueRef params[] = { - get_src(ctx, instr->src[0]), + get_src(ctx->nir, instr->src[0]), LLVMConstInt(ctx->i32, 0, false), offset, ctx->i1false, @@ -2061,18 +2438,19 @@ static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx, } return LLVMBuildBitCast(ctx->builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); + get_def_type(ctx->nir, &instr->dest.ssa), ""); } -static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { LLVMValueRef results[8], ret; LLVMValueRef rsrc = get_src(ctx, instr->src[0]); LLVMValueRef offset = get_src(ctx, instr->src[1]); int num_components = instr->num_components; - rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), ""); + if (ctx->abi->load_ubo) + rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); if (instr->dest.ssa.bit_size == 64) num_components *= 2; @@ -2080,10 +2458,10 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, for (unsigned i = 0; i < num_components; ++i) { LLVMValueRef params[] = { rsrc, - LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0), + LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 4 * i, 0), offset, "") }; - results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32, + results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const.v4i32", ctx->ac.f32, params, 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_LEGACY); @@ -2091,22 +2469,44 @@ static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx, ret = ac_build_gather_values(&ctx->ac, results, instr->num_components); - return LLVMBuildBitCast(ctx->builder, ret, + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } static void -radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, - bool vs_in, unsigned *vertex_index_out, - unsigned *const_out, LLVMValueRef *indir_out) +get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref, + bool vs_in, unsigned *vertex_index_out, + LLVMValueRef *vertex_index_ref, + unsigned *const_out, LLVMValueRef *indir_out) { unsigned const_offset = 0; + nir_deref *tail = &deref->deref; LLVMValueRef offset = NULL; - if (vertex_index_out != NULL) { + if (vertex_index_out != NULL || vertex_index_ref != NULL) { tail = tail->child; nir_deref_array *deref_array = nir_deref_as_array(tail); - *vertex_index_out = deref_array->base_offset; + if (vertex_index_out) + *vertex_index_out = deref_array->base_offset; + + if (vertex_index_ref) { + LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), ""); + } + *vertex_index_ref = vtx; + } + } + + if (deref->var->data.compact) { + assert(tail->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + /* We always lower indirect dereferences for "compact" array vars. */ + assert(deref_array->deref_array_type == nir_deref_array_type_direct); + + const_offset = deref_array->base_offset; + goto out; } while (tail->child != NULL) { @@ -2124,11 +2524,11 @@ radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, assert(deref_array->deref_array_type == nir_deref_array_type_indirect); index = get_src(ctx, deref_array->indirect); - stride = LLVMConstInt(ctx->i32, size, 0); - local_offset = LLVMBuildMul(ctx->builder, stride, index, ""); + stride = LLVMConstInt(ctx->ac.i32, size, 0); + local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, ""); if (offset) - offset = LLVMBuildAdd(ctx->builder, offset, local_offset, ""); + offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, ""); else offset = local_offset; } else if (tail->deref_type == nir_deref_type_struct) { @@ -2142,10 +2542,10 @@ radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, unreachable("unsupported deref type"); } - +out: if (const_offset && offset) - offset = LLVMBuildAdd(ctx->builder, offset, - LLVMConstInt(ctx->i32, const_offset, 0), + offset = LLVMBuildAdd(ctx->ac.builder, offset, + LLVMConstInt(ctx->ac.i32, const_offset, 0), ""); *const_out = const_offset; @@ -2153,32 +2553,345 @@ radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail, } static LLVMValueRef -load_gs_input(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +lds_load(struct nir_to_llvm_context *ctx, + LLVMValueRef dw_addr) { - LLVMValueRef indir_index, vtx_offset; - unsigned const_index; - LLVMValueRef args[9]; - unsigned param, vtx_offset_param; - LLVMValueRef value[4], result; - unsigned vertex_index; - unsigned cull_offset = 0; - radv_get_deref_offset(ctx, &instr->variables[0]->deref, - false, &vertex_index, - &const_index, &indir_index); - vtx_offset_param = vertex_index; - assert(vtx_offset_param < 6); - vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param], - LLVMConstInt(ctx->i32, 4, false), ""); - + LLVMValueRef value; + value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false); + return value; +} + +static void +lds_store(struct nir_to_llvm_context *ctx, + LLVMValueRef dw_addr, LLVMValueRef value) +{ + value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, ""); + ac_build_indexed_store(&ctx->ac, ctx->lds, + dw_addr, value); +} + +/* The offchip buffer layout for TCS->TES is + * + * - attribute 0 of patch 0 vertex 0 + * - attribute 0 of patch 0 vertex 1 + * - attribute 0 of patch 0 vertex 2 + * ... + * - attribute 0 of patch 1 vertex 0 + * - attribute 0 of patch 1 vertex 1 + * ... + * - attribute 1 of patch 0 vertex 0 + * - attribute 1 of patch 0 vertex 1 + * ... + * - per patch attribute 0 of patch 0 + * - per patch attribute 0 of patch 1 + * ... + * + * Note that every attribute has 4 components. + */ +static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx, + LLVMValueRef vertex_index, + LLVMValueRef param_index) +{ + LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices; + LLVMValueRef param_stride, constant16; + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + vertices_per_patch = unpack_param(ctx, ctx->tcs_offchip_layout, 9, 6); + num_patches = unpack_param(ctx, ctx->tcs_offchip_layout, 0, 9); + total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch, + num_patches, ""); + + constant16 = LLVMConstInt(ctx->i32, 16, false); + if (vertex_index) { + base_addr = LLVMBuildMul(ctx->builder, rel_patch_id, + vertices_per_patch, ""); + + base_addr = LLVMBuildAdd(ctx->builder, base_addr, + vertex_index, ""); + + param_stride = total_vertices; + } else { + base_addr = rel_patch_id; + param_stride = num_patches; + } + + base_addr = LLVMBuildAdd(ctx->builder, base_addr, + LLVMBuildMul(ctx->builder, param_index, + param_stride, ""), ""); + + base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, ""); + + if (!vertex_index) { + LLVMValueRef patch_data_offset = + unpack_param(ctx, ctx->tcs_offchip_layout, 16, 16); + + base_addr = LLVMBuildAdd(ctx->builder, base_addr, + patch_data_offset, ""); + } + return base_addr; +} + +static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx, + unsigned param, + unsigned const_index, + bool is_compact, + LLVMValueRef vertex_index, + LLVMValueRef indir_index) +{ + LLVMValueRef param_index; + + if (indir_index) + param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, param, false), + indir_index, ""); + else { + if (const_index && !is_compact) + param += const_index; + param_index = LLVMConstInt(ctx->i32, param, false); + } + return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); +} + +static void +mark_tess_output(struct nir_to_llvm_context *ctx, + bool is_patch, uint32_t param) + +{ + if (is_patch) { + ctx->tess_patch_outputs_written |= (1ull << param); + } else + ctx->tess_outputs_written |= (1ull << param); +} + +static LLVMValueRef +get_dw_address(struct nir_to_llvm_context *ctx, + LLVMValueRef dw_addr, + unsigned param, + unsigned const_index, + bool compact_const_index, + LLVMValueRef vertex_index, + LLVMValueRef stride, + LLVMValueRef indir_index) + +{ + + if (vertex_index) { + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + LLVMBuildMul(ctx->builder, + vertex_index, + stride, ""), ""); + } + + if (indir_index) + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + LLVMBuildMul(ctx->builder, indir_index, + LLVMConstInt(ctx->i32, 4, false), ""), ""); + else if (const_index && !compact_const_index) + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + LLVMConstInt(ctx->i32, const_index, false), ""); + + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + LLVMConstInt(ctx->i32, param * 4, false), ""); + + if (const_index && compact_const_index) + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + LLVMConstInt(ctx->i32, const_index, false), ""); + return dw_addr; +} + +static LLVMValueRef +load_tcs_input(struct nir_to_llvm_context *ctx, + nir_intrinsic_instr *instr) +{ + LLVMValueRef dw_addr, stride; + unsigned const_index; + LLVMValueRef vertex_index; + LLVMValueRef indir_index; + unsigned param; + LLVMValueRef value[4], result; + const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); + const bool is_compact = instr->variables[0]->var->data.compact; + param = shader_io_get_unique_index(instr->variables[0]->var->data.location); + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); + + stride = unpack_param(ctx, ctx->tcs_in_layout, 13, 8); + dw_addr = get_tcs_in_current_patch_offset(ctx); + dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, + indir_index); + + for (unsigned i = 0; i < instr->num_components; i++) { + value[i] = lds_load(ctx, dw_addr); + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + ctx->i32one, ""); + } + result = ac_build_gather_values(&ctx->ac, value, instr->num_components); + result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); + return result; +} + +static LLVMValueRef +load_tcs_output(struct nir_to_llvm_context *ctx, + nir_intrinsic_instr *instr) +{ + LLVMValueRef dw_addr, stride; + LLVMValueRef value[4], result; + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + unsigned param; + const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); + const bool is_compact = instr->variables[0]->var->data.compact; + param = shader_io_get_unique_index(instr->variables[0]->var->data.location); + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); + + if (!instr->variables[0]->var->data.patch) { + stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8); + dw_addr = get_tcs_out_current_patch_offset(ctx); + } else { + dw_addr = get_tcs_out_current_patch_data_offset(ctx); + } + + dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, + indir_index); + + for (unsigned i = 0; i < instr->num_components; i++) { + value[i] = lds_load(ctx, dw_addr); + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + ctx->i32one, ""); + } + result = ac_build_gather_values(&ctx->ac, value, instr->num_components); + result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); + return result; +} + +static void +store_tcs_output(struct nir_to_llvm_context *ctx, + nir_intrinsic_instr *instr, + LLVMValueRef src, + unsigned writemask) +{ + LLVMValueRef stride, dw_addr; + LLVMValueRef buf_addr = NULL; + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + unsigned param; + const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); + const bool is_compact = instr->variables[0]->var->data.compact; + + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); + + param = shader_io_get_unique_index(instr->variables[0]->var->data.location); + if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && + is_compact && const_index > 3) { + const_index -= 3; + param++; + } + + if (!instr->variables[0]->var->data.patch) { + stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8); + dw_addr = get_tcs_out_current_patch_offset(ctx); + } else { + dw_addr = get_tcs_out_current_patch_data_offset(ctx); + } + + mark_tess_output(ctx, instr->variables[0]->var->data.patch, param); + + dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, + indir_index); + buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, + vertex_index, indir_index); + + unsigned base = is_compact ? const_index : 0; + for (unsigned chan = 0; chan < 8; chan++) { + bool is_tess_factor = false; + if (!(writemask & (1 << chan))) + continue; + LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan); + + lds_store(ctx, dw_addr, value); + + if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || + instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER) + is_tess_factor = true; + + if (!is_tess_factor && writemask != 0xF) + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, + buf_addr, ctx->oc_lds, + 4 * (base + chan), 1, 0, true, false); + + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, + ctx->i32one, ""); + } + + if (writemask == 0xF) { + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4, + buf_addr, ctx->oc_lds, + (base * 4), 1, 0, true, false); + } +} + +static LLVMValueRef +load_tes_input(struct nir_to_llvm_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef buf_addr; + LLVMValueRef result; + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + unsigned param; + const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage); + const bool is_compact = instr->variables[0]->var->data.compact; + + get_deref_offset(ctx->nir, instr->variables[0], + false, NULL, per_vertex ? &vertex_index : NULL, + &const_index, &indir_index); + param = shader_io_get_unique_index(instr->variables[0]->var->data.location); + if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 && + is_compact && const_index > 3) { + const_index -= 3; + param++; + } + buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, + is_compact, vertex_index, indir_index); + + result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL, + buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); + result = trim_vector(&ctx->ac, result, instr->num_components); + result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), ""); + return result; +} + +static LLVMValueRef +load_gs_input(struct nir_to_llvm_context *ctx, + nir_intrinsic_instr *instr) +{ + LLVMValueRef indir_index, vtx_offset; + unsigned const_index; + LLVMValueRef args[9]; + unsigned param, vtx_offset_param; + LLVMValueRef value[4], result; + unsigned vertex_index; + get_deref_offset(ctx->nir, instr->variables[0], + false, &vertex_index, NULL, + &const_index, &indir_index); + vtx_offset_param = vertex_index; + assert(vtx_offset_param < 6); + vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param], + LLVMConstInt(ctx->i32, 4, false), ""); + param = shader_io_get_unique_index(instr->variables[0]->var->data.location); - if (instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0) - cull_offset += ctx->num_input_clips; for (unsigned i = 0; i < instr->num_components; i++) { args[0] = ctx->esgs_ring; args[1] = vtx_offset; - args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index + cull_offset) * 256, false); + args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false); args[3] = ctx->i32zero; args[4] = ctx->i32one; /* OFFEN */ args[5] = ctx->i32zero; /* IDXEN */ @@ -2196,7 +2909,46 @@ load_gs_input(struct nir_to_llvm_context *ctx, return result; } -static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, +static LLVMValueRef +build_gep_for_deref(struct ac_nir_context *ctx, + nir_deref_var *deref) +{ + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); + assert(entry->data); + LLVMValueRef val = entry->data; + nir_deref *tail = deref->deref.child; + while (tail != NULL) { + LLVMValueRef offset; + switch (tail->deref_type) { + case nir_deref_type_array: { + nir_deref_array *array = nir_deref_as_array(tail); + offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0); + if (array->deref_array_type == + nir_deref_array_type_indirect) { + offset = LLVMBuildAdd(ctx->ac.builder, offset, + get_src(ctx, + array->indirect), + ""); + } + break; + } + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = + nir_deref_as_struct(tail); + offset = LLVMConstInt(ctx->ac.i32, + deref_struct->index, 0); + break; + } + default: + unreachable("bad deref type"); + } + val = ac_build_gep0(&ctx->ac, val, offset); + tail = tail->child; + } + return val; +} + +static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef values[8]; @@ -2207,7 +2959,7 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, unsigned const_index; bool vs_in = ctx->stage == MESA_SHADER_VERTEX && instr->variables[0]->var->data.mode == nir_var_shader_in; - radv_get_deref_offset(ctx, &instr->variables[0]->deref, vs_in, NULL, + get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, &const_index, &indir_index); if (instr->dest.ssa.bit_size == 64) @@ -2215,8 +2967,12 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, switch (instr->variables[0]->var->data.mode) { case nir_var_shader_in: + if (ctx->stage == MESA_SHADER_TESS_CTRL) + return load_tcs_input(ctx->nctx, instr); + if (ctx->stage == MESA_SHADER_TESS_EVAL) + return load_tes_input(ctx->nctx, instr); if (ctx->stage == MESA_SHADER_GEOMETRY) { - return load_gs_input(ctx, instr); + return load_gs_input(ctx->nctx, instr); } for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { @@ -2225,14 +2981,14 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, ctx->stage == MESA_SHADER_VERTEX); count -= chan / 4; LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->inputs + idx + chan, count, + &ctx->ac, ctx->abi->inputs + idx + chan, count, 4, false); - values[chan] = LLVMBuildExtractElement(ctx->builder, + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else - values[chan] = ctx->inputs[idx + chan + const_index * 4]; + values[chan] = ctx->abi->inputs[idx + chan + const_index * 4]; } break; case nir_var_local: @@ -2245,15 +3001,25 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, &ctx->ac, ctx->locals + idx + chan, count, 4, true); - values[chan] = LLVMBuildExtractElement(ctx->builder, + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else { - values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], ""); + values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * 4], ""); } } break; + case nir_var_shared: { + LLVMValueRef address = build_gep_for_deref(ctx, + instr->variables[0]); + LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); + return LLVMBuildBitCast(ctx->ac.builder, val, + get_def_type(ctx, &instr->dest.ssa), + ""); + } case nir_var_shader_out: + if (ctx->stage == MESA_SHADER_TESS_CTRL) + return load_tcs_output(ctx->nctx, instr); for (unsigned chan = 0; chan < ve; chan++) { if (indir_index) { unsigned count = glsl_count_attribute_slots( @@ -2263,58 +3029,41 @@ static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx, &ctx->ac, ctx->outputs + idx + chan, count, 4, true); - values[chan] = LLVMBuildExtractElement(ctx->builder, + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, tmp_vec, indir_index, ""); } else { - values[chan] = LLVMBuildLoad(ctx->builder, + values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->outputs[idx + chan + const_index * 4], ""); } } break; - case nir_var_shared: { - LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); - LLVMValueRef derived_ptr; - - if (indir_index) - indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); - - for (unsigned chan = 0; chan < ve; chan++) { - LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); - if (indir_index) - index = LLVMBuildAdd(ctx->builder, index, indir_index, ""); - derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); - - values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, ""); - } - break; - } default: unreachable("unhandle variable mode"); } ret = ac_build_gather_values(&ctx->ac, values, ve); - return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } static void -visit_store_var(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +visit_store_var(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) { LLVMValueRef temp_ptr, value; int idx = instr->variables[0]->var->data.driver_location; - LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0])); + LLVMValueRef src = to_float(&ctx->ac, get_src(ctx, instr->src[0])); int writemask = instr->const_index[0]; LLVMValueRef indir_index; unsigned const_index; - radv_get_deref_offset(ctx, &instr->variables[0]->deref, false, - NULL, &const_index, &indir_index); + get_deref_offset(ctx, instr->variables[0], false, + NULL, NULL, &const_index, &indir_index); - if (get_elem_bits(ctx, LLVMTypeOf(src)) == 64) { + if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { int old_writemask = writemask; - src = LLVMBuildBitCast(ctx->builder, src, - LLVMVectorType(ctx->f32, get_llvm_num_components(src) * 2), + src = LLVMBuildBitCast(ctx->ac.builder, src, + LLVMVectorType(ctx->ac.f32, get_llvm_num_components(src) * 2), ""); writemask = 0; @@ -2326,15 +3075,20 @@ visit_store_var(struct nir_to_llvm_context *ctx, switch (instr->variables[0]->var->data.mode) { case nir_var_shader_out: + + if (ctx->stage == MESA_SHADER_TESS_CTRL) { + store_tcs_output(ctx->nctx, instr, src, writemask); + return; + } + for (unsigned chan = 0; chan < 8; chan++) { int stride = 4; if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(ctx, src, chan); + value = llvm_extract_elem(&ctx->ac, src, chan); - if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 || - instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0) + if (instr->variables[0]->var->data.compact) stride = 1; if (indir_index) { unsigned count = glsl_count_attribute_slots( @@ -2345,17 +3099,17 @@ visit_store_var(struct nir_to_llvm_context *ctx, stride, true); if (get_llvm_num_components(tmp_vec) > 1) { - tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, ""); } else tmp_vec = value; - build_store_values_extended(ctx, ctx->outputs + idx + chan, + build_store_values_extended(&ctx->ac, ctx->outputs + idx + chan, count, stride, tmp_vec); } else { temp_ptr = ctx->outputs[idx + chan + const_index * stride]; - LLVMBuildStore(ctx->builder, value, temp_ptr); + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); } } break; @@ -2364,7 +3118,7 @@ visit_store_var(struct nir_to_llvm_context *ctx, if (!(writemask & (1 << chan))) continue; - value = llvm_extract_elem(ctx, src, chan); + value = llvm_extract_elem(&ctx->ac, src, chan); if (indir_index) { unsigned count = glsl_count_attribute_slots( instr->variables[0]->var->type, false); @@ -2373,36 +3127,44 @@ visit_store_var(struct nir_to_llvm_context *ctx, &ctx->ac, ctx->locals + idx + chan, count, 4, true); - tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec, + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, value, indir_index, ""); - build_store_values_extended(ctx, ctx->locals + idx + chan, + build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, count, 4, tmp_vec); } else { temp_ptr = ctx->locals[idx + chan + const_index * 4]; - LLVMBuildStore(ctx->builder, value, temp_ptr); + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); } } break; case nir_var_shared: { - LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); - - if (indir_index) - indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), ""); - - for (unsigned chan = 0; chan < 8; chan++) { - if (!(writemask & (1 << chan))) - continue; - LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false); - LLVMValueRef derived_ptr; - - if (indir_index) - index = LLVMBuildAdd(ctx->builder, index, indir_index, ""); - - value = llvm_extract_elem(ctx, src, chan); - derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""); - LLVMBuildStore(ctx->builder, - to_integer(ctx, value), derived_ptr); + int writemask = instr->const_index[0]; + LLVMValueRef address = build_gep_for_deref(ctx, + instr->variables[0]); + LLVMValueRef val = get_src(ctx, instr->src[0]); + unsigned components = + glsl_get_vector_elements( + nir_deref_tail(&instr->variables[0]->deref)->type); + if (writemask == (1 << components) - 1) { + val = LLVMBuildBitCast( + ctx->ac.builder, val, + LLVMGetElementType(LLVMTypeOf(address)), ""); + LLVMBuildStore(ctx->ac.builder, val, address); + } else { + for (unsigned chan = 0; chan < 4; chan++) { + if (!(writemask & (1 << chan))) + continue; + LLVMValueRef ptr = + LLVMBuildStructGEP(ctx->ac.builder, + address, chan, ""); + LLVMValueRef src = llvm_extract_elem(&ctx->ac, val, + chan); + src = LLVMBuildBitCast( + ctx->ac.builder, src, + LLVMGetElementType(LLVMTypeOf(ptr)), ""); + LLVMBuildStore(ctx->ac.builder, src, ptr); + } } break; } @@ -2452,7 +3214,7 @@ static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) * The sample index should be adjusted as follows: * sample_index = (fmask >> (sample_index * 4)) & 0xF; */ -static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context *ctx, +static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, LLVMValueRef coord_x, LLVMValueRef coord_y, LLVMValueRef coord_z, LLVMValueRef sample_index, @@ -2474,9 +3236,9 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context * args.da = coord_z ? true : false; args.resource = fmask_desc_ptr; args.dmask = 0xf; - args.addr = ac_build_gather_values(&ctx->ac, fmask_load_address, coord_z ? 4 : 2); + args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); - res = ac_build_image_opcode(&ctx->ac, &args); + res = ac_build_image_opcode(ctx, &args); res = to_integer(ctx, res); LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); @@ -2484,7 +3246,7 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context * LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, res, - ctx->i32zero, ""); + ctx->i32_0, ""); LLVMValueRef sample_index4 = LLVMBuildMul(ctx->builder, sample_index, four, ""); @@ -2502,11 +3264,11 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context * LLVMValueRef fmask_word1 = LLVMBuildExtractElement(ctx->builder, fmask_desc, - ctx->i32one, ""); + ctx->i32_1, ""); LLVMValueRef word1_is_nonzero = LLVMBuildICmp(ctx->builder, LLVMIntNE, - fmask_word1, ctx->i32zero, ""); + fmask_word1, ctx->i32_0, ""); /* Replace the MSAA sample index. */ sample_index = @@ -2515,8 +3277,8 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context * return sample_index; } -static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { const struct glsl_type *type = instr->variables[0]->var->type; if(instr->variables[0]->deref.child) @@ -2525,11 +3287,11 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, LLVMValueRef src0 = get_src(ctx, instr->src[0]); LLVMValueRef coords[4]; LLVMValueRef masks[] = { - LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), - LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false), + LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), + LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), }; LLVMValueRef res; - LLVMValueRef sample_index = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0); + LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); int count; enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); @@ -2545,26 +3307,26 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, LLVMValueRef fmask_load_address[3]; int chan; - fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], ""); - fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], ""); + fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); + fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], ""); if (glsl_sampler_type_is_array(type)) - fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], ""); + fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], ""); else fmask_load_address[2] = NULL; if (add_frag_pos) { for (chan = 0; chan < 2; ++chan) - fmask_load_address[chan] = LLVMBuildAdd(ctx->builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); + fmask_load_address[chan] = LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->nctx->frag_pos[chan], ctx->ac.i32, ""), ""); } - sample_index = adjust_sample_index_using_fmask(ctx, + sample_index = adjust_sample_index_using_fmask(&ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2], sample_index, - get_sampler_desc(ctx, instr->variables[0], DESC_FMASK)); + get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, true, false)); } if (count == 1) { if (instr->src[0].ssa->num_components) - res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], ""); + res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); else res = src0; } else { @@ -2572,12 +3334,12 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, if (is_ms) count--; for (chan = 0; chan < count; ++chan) { - coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], ""); + coords[chan] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[chan], ""); } if (add_frag_pos) { for (chan = 0; chan < count; ++chan) - coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), ""); + coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->nctx->frag_pos[chan], ctx->ac.i32, ""), ""); } if (is_ms) { coords[count] = sample_index; @@ -2585,7 +3347,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, } if (count == 3) { - coords[3] = LLVMGetUndef(ctx->i32); + coords[3] = LLVMGetUndef(ctx->ac.i32); count = 4; } res = ac_build_gather_values(&ctx->ac, coords, count); @@ -2593,47 +3355,50 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx, return res; } -static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { LLVMValueRef params[7]; LLVMValueRef res; char intrinsic_name[64]; const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = var->type; + LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false); + LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false); + if(instr->variables[0]->deref.child) type = instr->variables[0]->deref.child->type; type = glsl_without_array(type); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { - params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); - params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), - LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ - params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ - params[3] = ctx->i1false; /* glc */ - params[4] = ctx->i1false; /* slc */ - res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32, + params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, false); + params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[2] = ctx->ac.i32_0; /* voffset */ + params[3] = i1false; /* glc */ + params[4] = i1false; /* slc */ + res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32, params, 5, 0); - res = trim_vector(ctx, res, instr->dest.ssa.num_components); - res = to_integer(ctx, res); + res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); + res = to_integer(&ctx->ac, res); } else { bool is_da = glsl_sampler_type_is_array(type) || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; - LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false; - LLVMValueRef glc = ctx->i1false; - LLVMValueRef slc = ctx->i1false; + LLVMValueRef da = is_da ? i1true : i1false; + LLVMValueRef glc = i1false; + LLVMValueRef slc = i1false; params[0] = get_image_coords(ctx, instr); - params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ + params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, false); + params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ if (HAVE_LLVM <= 0x0309) { - params[3] = ctx->i1false; /* r128 */ + params[3] = i1false; /* r128 */ params[4] = da; params[5] = glc; params[6] = slc; } else { - LLVMValueRef lwe = ctx->i1false; + LLVMValueRef lwe = i1false; params[3] = glc; params[4] = slc; params[5] = lwe; @@ -2641,56 +3406,58 @@ static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx, } ac_get_image_intr_name("llvm.amdgcn.image.load", - ctx->v4f32, /* vdata */ + ctx->ac.v4f32, /* vdata */ LLVMTypeOf(params[0]), /* coords */ LLVMTypeOf(params[1]), /* rsrc */ intrinsic_name, sizeof(intrinsic_name)); - res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32, + res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32, params, 7, AC_FUNC_ATTR_READONLY); } - return to_integer(ctx, res); + return to_integer(&ctx->ac, res); } -static void visit_image_store(struct nir_to_llvm_context *ctx, +static void visit_image_store(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef params[8]; char intrinsic_name[64]; const nir_variable *var = instr->variables[0]->var; const struct glsl_type *type = glsl_without_array(var->type); - - if (ctx->stage == MESA_SHADER_FRAGMENT) - ctx->shader_info->fs.writes_memory = true; + LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false); + LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false); + LLVMValueRef glc = i1false; + bool force_glc = ctx->abi->chip_class == SI; + if (force_glc) + glc = i1true; if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { - params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */ - params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); - params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), - LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ - params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */ - params[4] = ctx->i1false; /* glc */ - params[5] = ctx->i1false; /* slc */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt, + params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */ + params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true); + params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[3] = ctx->ac.i32_0; /* voffset */ + params[4] = glc; /* glc */ + params[5] = i1false; /* slc */ + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, params, 6, 0); } else { bool is_da = glsl_sampler_type_is_array(type) || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; - LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false; - LLVMValueRef glc = ctx->i1false; - LLVMValueRef slc = ctx->i1false; + LLVMValueRef da = is_da ? i1true : i1false; + LLVMValueRef slc = i1false; - params[0] = to_float(ctx, get_src(ctx, instr->src[2])); + params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2])); params[1] = get_image_coords(ctx, instr); /* coords */ - params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */ + params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, true); + params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ if (HAVE_LLVM <= 0x0309) { - params[4] = ctx->i1false; /* r128 */ + params[4] = i1false; /* r128 */ params[5] = da; params[6] = glc; params[7] = slc; } else { - LLVMValueRef lwe = ctx->i1false; + LLVMValueRef lwe = i1false; params[4] = glc; params[5] = slc; params[6] = lwe; @@ -2703,49 +3470,25 @@ static void visit_image_store(struct nir_to_llvm_context *ctx, LLVMTypeOf(params[2]), /* rsrc */ intrinsic_name, sizeof(intrinsic_name)); - ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt, + ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt, params, 8, 0); } } -static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { LLVMValueRef params[6]; int param_count = 0; const nir_variable *var = instr->variables[0]->var; - const char *base_name = "llvm.amdgcn.image.atomic"; const char *atomic_name; - LLVMValueRef coords; - char intrinsic_name[32], coords_type[8]; + char intrinsic_name[41]; const struct glsl_type *type = glsl_without_array(var->type); - - if (ctx->stage == MESA_SHADER_FRAGMENT) - ctx->shader_info->fs.writes_memory = true; - - params[param_count++] = get_src(ctx, instr->src[2]); - if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) - params[param_count++] = get_src(ctx, instr->src[3]); - - if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { - params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER); - coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]), - LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */ - params[param_count++] = ctx->i32zero; /* voffset */ - params[param_count++] = ctx->i1false; /* glc */ - params[param_count++] = ctx->i1false; /* slc */ - } else { - bool da = glsl_sampler_type_is_array(type) || - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; - - coords = params[param_count++] = get_image_coords(ctx, instr); - params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); - params[param_count++] = ctx->i1false; /* r128 */ - params[param_count++] = da ? ctx->i1true : ctx->i1false; /* da */ - params[param_count++] = ctx->i1false; /* slc */ - } + LLVMValueRef i1false = LLVMConstInt(ctx->ac.i1, 0, false); + LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false); + MAYBE_UNUSED int length; switch (instr->intrinsic) { case nir_intrinsic_image_atomic_add: @@ -2775,16 +3518,47 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx, default: abort(); } - build_int_type_name(LLVMTypeOf(coords), - coords_type, sizeof(coords_type)); - snprintf(intrinsic_name, sizeof(intrinsic_name), - "%s.%s.%s", base_name, atomic_name, coords_type); - return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0); + if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) + params[param_count++] = get_src(ctx, instr->src[3]); + params[param_count++] = get_src(ctx, instr->src[2]); + + if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { + params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, + true, true); + params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[param_count++] = ctx->ac.i32_0; /* voffset */ + params[param_count++] = i1false; /* slc */ + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.buffer.atomic.%s", atomic_name); + } else { + char coords_type[8]; + + bool da = glsl_sampler_type_is_array(type) || + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE; + + LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr); + params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, + true, true); + params[param_count++] = i1false; /* r128 */ + params[param_count++] = da ? i1true : i1false; /* da */ + params[param_count++] = i1false; /* slc */ + + build_int_type_name(LLVMTypeOf(coords), + coords_type, sizeof(coords_type)); + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type); + } + + assert(length < sizeof(intrinsic_name)); + return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0); } -static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { LLVMValueRef res; const nir_variable *var = instr->variables[0]->var; @@ -2795,33 +3569,40 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx, type = instr->variables[0]->deref.child->type; if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) - return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true); + return get_buffer_size(ctx, + get_sampler_desc(ctx, instr->variables[0], + AC_DESC_BUFFER, true, false), true); struct ac_image_args args = { 0 }; args.da = da; args.dmask = 0xf; - args.resource = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE); + args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->i32zero; + args.addr = ctx->ac.i32_0; res = ac_build_image_opcode(&ctx->ac, &args); if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && glsl_sampler_type_is_array(type)) { - LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); - LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, ""); - z = LLVMBuildSDiv(ctx->builder, z, six, ""); - res = LLVMBuildInsertElement(ctx->builder, res, z, two, ""); + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, ""); } return res; } -static void emit_waitcnt(struct nir_to_llvm_context *ctx) +#define NOOP_WAITCNT 0xf7f +#define LGKM_CNT 0x07f +#define VM_CNT 0xf70 + +static void emit_waitcnt(struct nir_to_llvm_context *ctx, + unsigned simm16) { LLVMValueRef args[1] = { - LLVMConstInt(ctx->i32, 0xf70, false), + LLVMConstInt(ctx->i32, simm16, false), }; ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0); @@ -2829,19 +3610,26 @@ static void emit_waitcnt(struct nir_to_llvm_context *ctx) static void emit_barrier(struct nir_to_llvm_context *ctx) { - // TODO tess + /* SI only (thanks to a hw bug workaround): + * The real barrier instruction isn’t needed, because an entire patch + * always fits into a single wave. + */ + if (ctx->options->chip_class == SI && + ctx->stage == MESA_SHADER_TESS_CTRL) { + emit_waitcnt(ctx, LGKM_CNT & VM_CNT); + return; + } ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier", - ctx->voidt, NULL, 0, 0); + ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); } static void emit_discard_if(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) + const nir_intrinsic_instr *instr) { LLVMValueRef cond; - ctx->shader_info->fs.can_discard = true; cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, - get_src(ctx, instr->src[0]), + get_src(ctx->nir, instr->src[0]), ctx->i32zero, ""); cond = LLVMBuildSelect(ctx->builder, cond, @@ -2862,15 +3650,14 @@ visit_load_local_invocation_index(struct nir_to_llvm_context *ctx) } static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) + const nir_intrinsic_instr *instr) { LLVMValueRef ptr, result; - int idx = instr->variables[0]->var->data.driver_location; - LLVMValueRef src = get_src(ctx, instr->src[0]); - ptr = get_shared_memory_ptr(ctx, idx, ctx->i32); + LLVMValueRef src = get_src(ctx->nir, instr->src[0]); + ptr = build_gep_for_deref(ctx->nir, instr->variables[0]); if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) { - LLVMValueRef src1 = get_src(ctx, instr->src[1]); + LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]); result = LLVMBuildAtomicCmpXchg(ctx->builder, ptr, src, src1, LLVMAtomicOrderingSequentiallyConsistent, @@ -2910,7 +3697,7 @@ static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx, return NULL; } - result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src), + result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(&ctx->ac, src), LLVMAtomicOrderingSequentiallyConsistent, false); } @@ -2952,28 +3739,29 @@ static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx, static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx, LLVMValueRef sample_id) { - /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */ - LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), ""); - LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), ""); - LLVMValueRef result[2]; + LLVMValueRef result; + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_PS_SAMPLE_POSITIONS, false)); - result[0] = ac_build_indexed_load_const(&ctx->ac, ctx->sample_positions, offset0); - result[1] = ac_build_indexed_load_const(&ctx->ac, ctx->sample_positions, offset1); + ptr = LLVMBuildBitCast(ctx->builder, ptr, + const_array(ctx->v2f32, 64), ""); - return ac_build_gather_values(&ctx->ac, result, 2); + sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, ""); + result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false); + + return result; } static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx) { LLVMValueRef values[2]; - values[0] = emit_ffract(ctx, ctx->frag_pos[0]); - values[1] = emit_ffract(ctx, ctx->frag_pos[1]); + values[0] = emit_ffract(&ctx->ac, ctx->frag_pos[0]); + values[1] = emit_ffract(&ctx->ac, ctx->frag_pos[1]); return ac_build_gather_values(&ctx->ac, values, 2); } static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) + const nir_intrinsic_instr *instr) { LLVMValueRef result[2]; LLVMValueRef interp_param, attr_number; @@ -2987,19 +3775,17 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, location = INTERP_CENTROID; break; case nir_intrinsic_interp_var_at_sample: - location = INTERP_SAMPLE; - src0 = get_src(ctx, instr->src[0]); - break; case nir_intrinsic_interp_var_at_offset: location = INTERP_CENTER; - src0 = get_src(ctx, instr->src[0]); + src0 = get_src(ctx->nir, instr->src[0]); + break; default: break; } if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) { - src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "")); - src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "")); + src_c0 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "")); + src_c1 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "")); } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) { LLVMValueRef sample_position; LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f); @@ -3080,12 +3866,12 @@ static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx, static void visit_emit_vertex(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) + const nir_intrinsic_instr *instr) { LLVMValueRef gs_next_vertex; LLVMValueRef can_emit, kill; int idx; - int clip_cull_slot = -1; + assert(instr->const_index[0] == 0); /* Write vertex attribute values to GSVS ring */ gs_next_vertex = LLVMBuildLoad(ctx->builder, @@ -3108,41 +3894,24 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, /* loop num outputs */ idx = 0; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; + LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; int length = 4; - int start = 0; int slot = idx; int slot_inc = 1; if (!(ctx->output_mask & (1ull << i))) continue; - if (i == VARYING_SLOT_CLIP_DIST1 || - i == VARYING_SLOT_CULL_DIST1) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0 || - i == VARYING_SLOT_CULL_DIST0) { + if (i == VARYING_SLOT_CLIP_DIST0) { /* pack clip and cull into a single set of slots */ - if (clip_cull_slot == -1) { - clip_cull_slot = idx; - if (ctx->num_output_clips + ctx->num_output_culls > 4) - slot_inc = 2; - } else { - slot = clip_cull_slot; - slot_inc = 0; - } - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips; - if (i == VARYING_SLOT_CULL_DIST0) { - start = ctx->num_output_clips; - length = ctx->num_output_culls; - } + length = ctx->num_output_clips + ctx->num_output_culls; + if (length > 4) + slot_inc = 2; } for (unsigned j = 0; j < length; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); - LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j + start) * ctx->gs_max_out_vertices, false); + LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false); voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, ""); voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), ""); @@ -3165,84 +3934,112 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx, static void visit_end_primitive(struct nir_to_llvm_context *ctx, - nir_intrinsic_instr *instr) + const nir_intrinsic_instr *instr) { ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); } -static void visit_intrinsic(struct nir_to_llvm_context *ctx, +static LLVMValueRef +visit_load_tess_coord(struct nir_to_llvm_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef coord[4] = { + ctx->tes_u, + ctx->tes_v, + ctx->f32zero, + ctx->f32zero, + }; + + if (ctx->tes_primitive_mode == GL_TRIANGLES) + coord[2] = LLVMBuildFSub(ctx->builder, ctx->f32one, + LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), ""); + + LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, instr->num_components); + return LLVMBuildBitCast(ctx->builder, result, + get_def_type(ctx->nir, &instr->dest.ssa), ""); +} + +static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr) { LLVMValueRef result = NULL; switch (instr->intrinsic) { case nir_intrinsic_load_work_group_id: { - result = ctx->workgroup_ids; + result = ctx->nctx->workgroup_ids; break; } case nir_intrinsic_load_base_vertex: { - result = ctx->base_vertex; + result = ctx->abi->base_vertex; break; } case nir_intrinsic_load_vertex_id_zero_base: { - result = ctx->vertex_id; + result = ctx->abi->vertex_id; break; } case nir_intrinsic_load_local_invocation_id: { - result = ctx->local_invocation_ids; + result = ctx->nctx->local_invocation_ids; break; } case nir_intrinsic_load_base_instance: - result = ctx->start_instance; + result = ctx->abi->start_instance; break; case nir_intrinsic_load_draw_id: - result = ctx->draw_index; + result = ctx->abi->draw_id; break; case nir_intrinsic_load_invocation_id: - result = ctx->gs_invocation_id; + if (ctx->stage == MESA_SHADER_TESS_CTRL) + result = unpack_param(ctx->nctx, ctx->nctx->tcs_rel_ids, 8, 5); + else + result = ctx->nctx->gs_invocation_id; break; case nir_intrinsic_load_primitive_id: - if (ctx->stage == MESA_SHADER_GEOMETRY) - result = ctx->gs_prim_id; - else + if (ctx->stage == MESA_SHADER_GEOMETRY) { + ctx->nctx->shader_info->gs.uses_prim_id = true; + result = ctx->nctx->gs_prim_id; + } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { + ctx->nctx->shader_info->tcs.uses_prim_id = true; + result = ctx->nctx->tcs_patch_id; + } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + ctx->nctx->shader_info->tcs.uses_prim_id = true; + result = ctx->nctx->tes_patch_id; + } else fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); break; case nir_intrinsic_load_sample_id: - ctx->shader_info->fs.force_persample = true; - result = unpack_param(ctx, ctx->ancillary, 8, 4); + ctx->nctx->shader_info->fs.force_persample = true; + result = unpack_param(ctx->nctx, ctx->nctx->ancillary, 8, 4); break; case nir_intrinsic_load_sample_pos: - ctx->shader_info->fs.force_persample = true; - result = load_sample_pos(ctx); + ctx->nctx->shader_info->fs.force_persample = true; + result = load_sample_pos(ctx->nctx); break; case nir_intrinsic_load_sample_mask_in: - result = ctx->sample_coverage; + result = ctx->nctx->sample_coverage; break; case nir_intrinsic_load_front_face: - result = ctx->front_face; + result = ctx->nctx->front_face; break; case nir_intrinsic_load_instance_id: - result = ctx->instance_id; - ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, - ctx->shader_info->vs.vgpr_comp_cnt); + result = ctx->abi->instance_id; break; case nir_intrinsic_load_num_work_groups: - result = ctx->num_work_groups; + result = ctx->nctx->num_work_groups; break; case nir_intrinsic_load_local_invocation_index: - result = visit_load_local_invocation_index(ctx); + result = visit_load_local_invocation_index(ctx->nctx); break; case nir_intrinsic_load_push_constant: - result = visit_load_push_constant(ctx, instr); + result = visit_load_push_constant(ctx->nctx, instr); break; case nir_intrinsic_vulkan_resource_index: - result = visit_vulkan_resource_index(ctx, instr); + result = visit_vulkan_resource_index(ctx->nctx, instr); break; case nir_intrinsic_store_ssbo: - visit_store_ssbo(ctx, instr); + visit_store_ssbo(ctx->nctx, instr); break; case nir_intrinsic_load_ssbo: - result = visit_load_buffer(ctx, instr); + result = visit_load_buffer(ctx->nctx, instr); break; case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_imin: @@ -3254,7 +4051,7 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, case nir_intrinsic_ssbo_atomic_xor: case nir_intrinsic_ssbo_atomic_exchange: case nir_intrinsic_ssbo_atomic_comp_swap: - result = visit_atomic_ssbo(ctx, instr); + result = visit_atomic_ssbo(ctx->nctx, instr); break; case nir_intrinsic_load_ubo: result = visit_load_ubo_buffer(ctx, instr); @@ -3288,19 +4085,18 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, result = visit_image_size(ctx, instr); break; case nir_intrinsic_discard: - ctx->shader_info->fs.can_discard = true; ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp", - ctx->voidt, + LLVMVoidTypeInContext(ctx->ac.context), NULL, 0, AC_FUNC_ATTR_LEGACY); break; case nir_intrinsic_discard_if: - emit_discard_if(ctx, instr); + emit_discard_if(ctx->nctx, instr); break; case nir_intrinsic_memory_barrier: - emit_waitcnt(ctx); + emit_waitcnt(ctx->nctx, VM_CNT); break; case nir_intrinsic_barrier: - emit_barrier(ctx); + emit_barrier(ctx->nctx); break; case nir_intrinsic_var_atomic_add: case nir_intrinsic_var_atomic_imin: @@ -3312,18 +4108,24 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, case nir_intrinsic_var_atomic_xor: case nir_intrinsic_var_atomic_exchange: case nir_intrinsic_var_atomic_comp_swap: - result = visit_var_atomic(ctx, instr); + result = visit_var_atomic(ctx->nctx, instr); break; case nir_intrinsic_interp_var_at_centroid: case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: - result = visit_interp(ctx, instr); + result = visit_interp(ctx->nctx, instr); break; case nir_intrinsic_emit_vertex: - visit_emit_vertex(ctx, instr); + visit_emit_vertex(ctx->nctx, instr); break; case nir_intrinsic_end_primitive: - visit_end_primitive(ctx, instr); + visit_end_primitive(ctx->nctx, instr); + break; + case nir_intrinsic_load_tess_coord: + result = visit_load_tess_coord(ctx->nctx, instr); + break; + case nir_intrinsic_load_patch_vertices_in: + result = LLVMConstInt(ctx->ac.i32, ctx->nctx->options->key.tcs.input_vertices, false); break; default: fprintf(stderr, "Unknown intrinsic: "); @@ -3336,42 +4138,47 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx, } } -static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, - nir_deref_var *deref, - enum desc_type desc_type) +static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, + unsigned descriptor_set, + unsigned base_index, + unsigned constant_index, + LLVMValueRef index, + enum ac_descriptor_type desc_type, + bool image, bool write) { - unsigned desc_set = deref->var->data.descriptor_set; - LLVMValueRef list = ctx->descriptor_sets[desc_set]; - struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout; - struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding; + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; + struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout; + struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; unsigned offset = binding->offset; unsigned stride = binding->size; unsigned type_size; LLVMBuilderRef builder = ctx->builder; LLVMTypeRef type; - LLVMValueRef index = NULL; - unsigned constant_index = 0; - assert(deref->var->data.binding < layout->binding_count); + assert(base_index < layout->binding_count); + + if (write && ctx->stage == MESA_SHADER_FRAGMENT) + ctx->shader_info->fs.writes_memory = true; switch (desc_type) { - case DESC_IMAGE: + case AC_DESC_IMAGE: type = ctx->v8i32; type_size = 32; break; - case DESC_FMASK: + case AC_DESC_FMASK: type = ctx->v8i32; offset += 32; type_size = 32; break; - case DESC_SAMPLER: + case AC_DESC_SAMPLER: type = ctx->v4i32; if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) offset += 64; type_size = 16; break; - case DESC_BUFFER: + case AC_DESC_BUFFER: type = ctx->v4i32; type_size = 16; break; @@ -3379,27 +4186,20 @@ static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, unreachable("invalid desc_type\n"); } - if (deref->deref.child) { - nir_deref_array *child = (nir_deref_array*)deref->deref.child; - - assert(child->deref_array_type != nir_deref_array_type_wildcard); - offset += child->base_offset * stride; - if (child->deref_array_type == nir_deref_array_type_indirect) { - index = get_src(ctx, child->indirect); - } + offset += constant_index * stride; - constant_index = child->base_offset; - } - if (desc_type == DESC_SAMPLER && binding->immutable_samplers && + if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && (!index || binding->immutable_samplers_equal)) { if (binding->immutable_samplers_equal) constant_index = 0; + const uint32_t *samplers = radv_immutable_samplers(layout, binding); + LLVMValueRef constants[] = { - LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 0], 0), - LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 1], 0), - LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 2], 0), - LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 3], 0), + LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 0], 0), + LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 1], 0), + LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 2], 0), + LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 3], 0), }; return ac_build_gather_values(&ctx->ac, constants, 4); } @@ -3417,9 +4217,51 @@ static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx, return ac_build_indexed_load_const(&ctx->ac, list, index); } -static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, +static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, + const nir_deref_var *deref, + enum ac_descriptor_type desc_type, + bool image, bool write) +{ + LLVMValueRef index = NULL; + unsigned constant_index = 0; + const nir_deref *tail = &deref->deref; + + while (tail->child) { + const nir_deref_array *child = nir_deref_as_array(tail->child); + unsigned array_size = glsl_get_aoa_size(tail->child->type); + + if (!array_size) + array_size = 1; + + assert(child->deref_array_type != nir_deref_array_type_wildcard); + + if (child->deref_array_type == nir_deref_array_type_indirect) { + LLVMValueRef indirect = get_src(ctx, child->indirect); + + indirect = LLVMBuildMul(ctx->ac.builder, indirect, + LLVMConstInt(ctx->ac.i32, array_size, false), ""); + + if (!index) + index = indirect; + else + index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); + } + + constant_index += child->base_offset * array_size; + + tail = &child->deref; + } + + return ctx->abi->load_sampler_desc(ctx->abi, + deref->var->data.descriptor_set, + deref->var->data.binding, + constant_index, index, + desc_type, image, write); +} + +static void set_tex_fetch_args(struct ac_llvm_context *ctx, struct ac_image_args *args, - nir_tex_instr *instr, + const nir_tex_instr *instr, nir_texop op, LLVMValueRef res_ptr, LLVMValueRef samp_ptr, LLVMValueRef *param, unsigned count, @@ -3435,7 +4277,7 @@ static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, param[count++] = LLVMGetUndef(ctx->i32); if (count > 1) - args->addr = ac_build_gather_values(&ctx->ac, param, count); + args->addr = ac_build_gather_values(ctx, param, count); else args->addr = param[0]; @@ -3463,56 +4305,56 @@ static void set_tex_fetch_args(struct nir_to_llvm_context *ctx, * VI: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ -static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx, +static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res, LLVMValueRef samp) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef img7, samp0; - if (ctx->options->chip_class >= VI) + if (ctx->abi->chip_class >= VI) return samp; img7 = LLVMBuildExtractElement(builder, res, - LLVMConstInt(ctx->i32, 7, 0), ""); + LLVMConstInt(ctx->ac.i32, 7, 0), ""); samp0 = LLVMBuildExtractElement(builder, samp, - LLVMConstInt(ctx->i32, 0, 0), ""); + LLVMConstInt(ctx->ac.i32, 0, 0), ""); samp0 = LLVMBuildAnd(builder, samp0, img7, ""); return LLVMBuildInsertElement(builder, samp, samp0, - LLVMConstInt(ctx->i32, 0, 0), ""); + LLVMConstInt(ctx->ac.i32, 0, 0), ""); } -static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx, +static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr, LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) - *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER); + *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, false, false); else - *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE); + *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, false, false); if (samp_ptr) { if (instr->sampler) - *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER); + *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, false, false); else - *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER); + *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, false, false); if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); } if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical)) - *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK); + *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, false, false); } -static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx, +static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord) { coord = to_float(ctx, coord); - coord = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0); + coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0); coord = to_integer(ctx, coord); return coord; } -static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) +static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) { LLVMValueRef result = NULL; struct ac_image_args args = { 0 }; @@ -3526,6 +4368,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) LLVMValueRef derivs[6]; unsigned chan, count = 0; unsigned const_src = 0, num_deriv_comp = 0; + bool lod_is_zero = false; tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); @@ -3546,9 +4389,14 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) case nir_tex_src_bias: bias = get_src(ctx, instr->src[i].src); break; - case nir_tex_src_lod: + case nir_tex_src_lod: { + nir_const_value *val = nir_src_as_const_value(instr->src[i].src); + + if (val && val->i32[0] == 0) + lod_is_zero = true; lod = get_src(ctx, instr->src[i].src); break; + } case nir_tex_src_ms_index: sample_index = get_src(ctx, instr->src[i].src); break; @@ -3576,48 +4424,48 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) if (instr->op == nir_texop_texture_samples) { LLVMValueRef res, samples, is_msaa; - res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, ""); - samples = LLVMBuildExtractElement(ctx->builder, res, - LLVMConstInt(ctx->i32, 3, false), ""); - is_msaa = LLVMBuildLShr(ctx->builder, samples, - LLVMConstInt(ctx->i32, 28, false), ""); - is_msaa = LLVMBuildAnd(ctx->builder, is_msaa, - LLVMConstInt(ctx->i32, 0xe, false), ""); - is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa, - LLVMConstInt(ctx->i32, 0xe, false), ""); - - samples = LLVMBuildLShr(ctx->builder, samples, - LLVMConstInt(ctx->i32, 16, false), ""); - samples = LLVMBuildAnd(ctx->builder, samples, - LLVMConstInt(ctx->i32, 0xf, false), ""); - samples = LLVMBuildShl(ctx->builder, ctx->i32one, + res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); + samples = LLVMBuildExtractElement(ctx->ac.builder, res, + LLVMConstInt(ctx->ac.i32, 3, false), ""); + is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 28, false), ""); + is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, + LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, + LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + + samples = LLVMBuildLShr(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 16, false), ""); + samples = LLVMBuildAnd(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 0xf, false), ""); + samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, ""); - samples = LLVMBuildSelect(ctx->builder, is_msaa, samples, - ctx->i32one, ""); + samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, + ctx->ac.i32_1, ""); result = samples; goto write_result; } if (coord) for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = llvm_extract_elem(ctx, coord, chan); + coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan); if (offsets && instr->op != nir_texop_txf) { LLVMValueRef offset[3], pack; for (chan = 0; chan < 3; ++chan) - offset[chan] = ctx->i32zero; + offset[chan] = ctx->ac.i32_0; args.offset = true; for (chan = 0; chan < get_llvm_num_components(offsets); chan++) { - offset[chan] = llvm_extract_elem(ctx, offsets, chan); - offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan], - LLVMConstInt(ctx->i32, 0x3f, false), ""); + offset[chan] = llvm_extract_elem(&ctx->ac, offsets, chan); + offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], + LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); if (chan) - offset[chan] = LLVMBuildShl(ctx->builder, offset[chan], - LLVMConstInt(ctx->i32, chan * 8, false), ""); + offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], + LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); } - pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], ""); - pack = LLVMBuildOr(ctx->builder, pack, offset[2], ""); + pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); + pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); address[count++] = pack; } @@ -3628,7 +4476,7 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) /* Pack depth comparison value */ if (instr->is_shadow && comparator) { - address[count++] = llvm_extract_elem(ctx, comparator, 0); + address[count++] = llvm_extract_elem(&ctx->ac, comparator, 0); } /* pack derivatives */ @@ -3648,16 +4496,18 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) } for (unsigned i = 0; i < num_deriv_comp; i++) { - derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i)); - derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i)); + derivs[i] = to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, i)); + derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddy, i)); } } if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { + if (instr->is_array && instr->op != nir_texop_lod) + coords[3] = apply_round_slice(&ctx->ac, coords[3]); for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = to_float(ctx, coords[chan]); + coords[chan] = to_float(&ctx->ac, coords[chan]); if (instr->coord_components == 3) - coords[3] = LLVMGetUndef(ctx->f32); + coords[3] = LLVMGetUndef(ctx->ac.f32); ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array, coords, derivs); @@ -3675,21 +4525,24 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) address[count++] = coords[0]; if (instr->coord_components > 1) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { - coords[1] = apply_round_slice(ctx, coords[1]); + coords[1] = apply_round_slice(&ctx->ac, coords[1]); } address[count++] = coords[1]; } if (instr->coord_components > 2) { /* This seems like a bit of a hack - but it passes Vulkan CTS with it */ - if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) { - coords[2] = apply_round_slice(ctx, coords[2]); + if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && + instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE && + instr->op != nir_texop_txf) { + coords[2] = apply_round_slice(&ctx->ac, coords[2]); } address[count++] = coords[2]; } } /* Pack LOD */ - if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) { + if (lod && ((instr->op == nir_texop_txl && !lod_is_zero) || + instr->op == nir_texop_txf)) { address[count++] = lod; } else if (instr->op == nir_texop_txf_ms && sample_index) { address[count++] = sample_index; @@ -3698,12 +4551,12 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) if (lod) address[count++] = lod; else - address[count++] = ctx->i32zero; + address[count++] = ctx->ac.i32_0; } for (chan = 0; chan < count; chan++) { - address[chan] = LLVMBuildBitCast(ctx->builder, - address[chan], ctx->i32, ""); + address[chan] = LLVMBuildBitCast(ctx->ac.builder, + address[chan], ctx->ac.i32, ""); } if (instr->op == nir_texop_samples_identical) { @@ -3713,24 +4566,24 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) memcpy(txf_address, address, sizeof(txf_address)); if (!instr->is_array) - txf_address[2] = ctx->i32zero; - txf_address[3] = ctx->i32zero; + txf_address[2] = ctx->ac.i32_0; + txf_address[3] = ctx->ac.i32_0; - set_tex_fetch_args(ctx, &txf_args, instr, nir_texop_txf, + set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, fmask_ptr, NULL, txf_address, txf_count, 0xf); - result = build_tex_intrinsic(ctx, instr, &txf_args); + result = build_tex_intrinsic(ctx, instr, false, &txf_args); - result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); - result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero); + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); goto write_result; } if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && instr->op != nir_texop_txs) { unsigned sample_chan = instr->is_array ? 3 : 2; - address[sample_chan] = adjust_sample_index_using_fmask(ctx, + address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, address[0], address[1], instr->is_array ? address[2] : NULL, @@ -3745,13 +4598,13 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) assert(const_offset); num_offsets = MIN2(num_offsets, instr->coord_components); if (num_offsets > 2) - address[2] = LLVMBuildAdd(ctx->builder, - address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), ""); + address[2] = LLVMBuildAdd(ctx->ac.builder, + address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); if (num_offsets > 1) - address[1] = LLVMBuildAdd(ctx->builder, - address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), ""); - address[0] = LLVMBuildAdd(ctx->builder, - address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), ""); + address[1] = LLVMBuildAdd(ctx->ac.builder, + address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); + address[0] = LLVMBuildAdd(ctx->ac.builder, + address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); } @@ -3762,45 +4615,47 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr) else dmask = 1 << instr->component; } - set_tex_fetch_args(ctx, &args, instr, instr->op, + set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, res_ptr, samp_ptr, address, count, dmask); - result = build_tex_intrinsic(ctx, instr, &args); + result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); if (instr->op == nir_texop_query_levels) - result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), ""); - else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4) - result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, ""); + result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); + else if (instr->is_shadow && instr->is_new_style_shadow && + instr->op != nir_texop_txs && instr->op != nir_texop_lod && + instr->op != nir_texop_tg4) + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array) { - LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false); - LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, ""); - z = LLVMBuildSDiv(ctx->builder, z, six, ""); - result = LLVMBuildInsertElement(ctx->builder, result, z, two, ""); + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, ""); } else if (instr->dest.ssa.num_components != 4) - result = trim_vector(ctx, result, instr->dest.ssa.num_components); + result = trim_vector(&ctx->ac, result, instr->dest.ssa.num_components); write_result: if (result) { assert(instr->dest.is_ssa); - result = to_integer(ctx, result); + result = to_integer(&ctx->ac, result); _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); } } -static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr) +static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr) { LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); - LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, ""); + LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, ""); _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); _mesa_hash_table_insert(ctx->phis, instr, result); } -static void visit_post_phi(struct nir_to_llvm_context *ctx, +static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi) { @@ -3812,7 +4667,7 @@ static void visit_post_phi(struct nir_to_llvm_context *ctx, } } -static void phi_post_pass(struct nir_to_llvm_context *ctx) +static void phi_post_pass(struct ac_nir_context *ctx) { struct hash_entry *entry; hash_table_foreach(ctx->phis, entry) { @@ -3822,31 +4677,31 @@ static void phi_post_pass(struct nir_to_llvm_context *ctx) } -static void visit_ssa_undef(struct nir_to_llvm_context *ctx, - nir_ssa_undef_instr *instr) +static void visit_ssa_undef(struct ac_nir_context *ctx, + const nir_ssa_undef_instr *instr) { unsigned num_components = instr->def.num_components; LLVMValueRef undef; if (num_components == 1) - undef = LLVMGetUndef(ctx->i32); + undef = LLVMGetUndef(ctx->ac.i32); else { - undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components)); + undef = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, num_components)); } _mesa_hash_table_insert(ctx->defs, &instr->def, undef); } -static void visit_jump(struct nir_to_llvm_context *ctx, - nir_jump_instr *instr) +static void visit_jump(struct ac_nir_context *ctx, + const nir_jump_instr *instr) { switch (instr->type) { case nir_jump_break: - LLVMBuildBr(ctx->builder, ctx->break_block); - LLVMClearInsertionPosition(ctx->builder); + LLVMBuildBr(ctx->ac.builder, ctx->break_block); + LLVMClearInsertionPosition(ctx->ac.builder); break; case nir_jump_continue: - LLVMBuildBr(ctx->builder, ctx->continue_block); - LLVMClearInsertionPosition(ctx->builder); + LLVMBuildBr(ctx->ac.builder, ctx->continue_block); + LLVMClearInsertionPosition(ctx->ac.builder); break; default: fprintf(stderr, "Unknown NIR jump instr: "); @@ -3856,12 +4711,12 @@ static void visit_jump(struct nir_to_llvm_context *ctx, } } -static void visit_cf_list(struct nir_to_llvm_context *ctx, +static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list); -static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block) +static void visit_block(struct ac_nir_context *ctx, nir_block *block) { - LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder); + LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder); nir_foreach_instr(instr, block) { switch (instr->type) { @@ -3897,61 +4752,63 @@ static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block) _mesa_hash_table_insert(ctx->defs, block, llvm_block); } -static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt) +static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt) { LLVMValueRef value = get_src(ctx, if_stmt->condition); + LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder)); LLVMBasicBlockRef merge_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); LLVMBasicBlockRef if_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); LLVMBasicBlockRef else_block = merge_block; if (!exec_list_is_empty(&if_stmt->else_list)) else_block = LLVMAppendBasicBlockInContext( - ctx->context, ctx->main_function, ""); + ctx->ac.context, fn, ""); - LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value, - LLVMConstInt(ctx->i32, 0, false), ""); - LLVMBuildCondBr(ctx->builder, cond, if_block, else_block); + LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value, + LLVMConstInt(ctx->ac.i32, 0, false), ""); + LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block); - LLVMPositionBuilderAtEnd(ctx->builder, if_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block); visit_cf_list(ctx, &if_stmt->then_list); - if (LLVMGetInsertBlock(ctx->builder)) - LLVMBuildBr(ctx->builder, merge_block); + if (LLVMGetInsertBlock(ctx->ac.builder)) + LLVMBuildBr(ctx->ac.builder, merge_block); if (!exec_list_is_empty(&if_stmt->else_list)) { - LLVMPositionBuilderAtEnd(ctx->builder, else_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block); visit_cf_list(ctx, &if_stmt->else_list); - if (LLVMGetInsertBlock(ctx->builder)) - LLVMBuildBr(ctx->builder, merge_block); + if (LLVMGetInsertBlock(ctx->ac.builder)) + LLVMBuildBr(ctx->ac.builder, merge_block); } - LLVMPositionBuilderAtEnd(ctx->builder, merge_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block); } -static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop) +static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop) { + LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder)); LLVMBasicBlockRef continue_parent = ctx->continue_block; LLVMBasicBlockRef break_parent = ctx->break_block; ctx->continue_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); ctx->break_block = - LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, ""); + LLVMAppendBasicBlockInContext(ctx->ac.context, fn, ""); - LLVMBuildBr(ctx->builder, ctx->continue_block); - LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block); + LLVMBuildBr(ctx->ac.builder, ctx->continue_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block); visit_cf_list(ctx, &loop->body); - if (LLVMGetInsertBlock(ctx->builder)) - LLVMBuildBr(ctx->builder, ctx->continue_block); - LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block); + if (LLVMGetInsertBlock(ctx->ac.builder)) + LLVMBuildBr(ctx->ac.builder, ctx->continue_block); + LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block); ctx->continue_block = continue_parent; ctx->break_block = break_parent; } -static void visit_cf_list(struct nir_to_llvm_context *ctx, +static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list) { foreach_list_typed(nir_cf_node, node, node, list) @@ -3982,7 +4839,6 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, LLVMValueRef t_list_ptr = ctx->vertex_buffers; LLVMValueRef t_offset; LLVMValueRef t_list; - LLVMValueRef args[3]; LLVMValueRef input; LLVMValueRef buffer_index; int index = variable->data.location - VERT_ATTRIB_GENERIC0; @@ -3992,51 +4848,33 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx, variable->data.driver_location = idx * 4; if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) { - buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id, - ctx->start_instance, ""); + buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id, + ctx->abi.start_instance, ""); ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3, ctx->shader_info->vs.vgpr_comp_cnt); } else - buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id, - ctx->base_vertex, ""); + buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id, + ctx->abi.base_vertex, ""); for (unsigned i = 0; i < attrib_count; ++i, ++idx) { t_offset = LLVMConstInt(ctx->i32, index + i, false); t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset); - args[0] = t_list; - args[1] = LLVMConstInt(ctx->i32, 0, false); - args[2] = buffer_index; - input = ac_build_intrinsic(&ctx->ac, - "llvm.SI.vs.load.input", ctx->v4f32, args, 3, - AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND | - AC_FUNC_ATTR_LEGACY); + + input = ac_build_buffer_load_format(&ctx->ac, t_list, + buffer_index, + LLVMConstInt(ctx->i32, 0, false), + true); for (unsigned chan = 0; chan < 4; chan++) { LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false); ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = - to_integer(ctx, LLVMBuildExtractElement(ctx->builder, + to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder, input, llvm_chan, "")); } } } -static void -handle_gs_input_decl(struct nir_to_llvm_context *ctx, - struct nir_variable *variable) -{ - int idx = variable->data.location; - - if (idx == VARYING_SLOT_CLIP_DIST0 || - idx == VARYING_SLOT_CULL_DIST0) { - int length = glsl_get_length(glsl_get_array_element(variable->type)); - if (idx == VARYING_SLOT_CLIP_DIST0) - ctx->num_input_clips = length; - else - ctx->num_input_culls = length; - } -} - static void interp_fs_input(struct nir_to_llvm_context *ctx, unsigned attr, LLVMValueRef interp_param, @@ -4129,9 +4967,6 @@ handle_shader_input_decl(struct nir_to_llvm_context *ctx, case MESA_SHADER_FRAGMENT: handle_fs_input_decl(ctx, variable); break; - case MESA_SHADER_GEOMETRY: - handle_gs_input_decl(ctx, variable); - break; default: break; } @@ -4177,16 +5012,16 @@ handle_fs_inputs_pre(struct nir_to_llvm_context *ctx, } static LLVMValueRef -ac_build_alloca(struct nir_to_llvm_context *ctx, +ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) { - LLVMBuilderRef builder = ctx->builder; + LLVMBuilderRef builder = ac->builder; LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder); LLVMValueRef function = LLVMGetBasicBlockParent(current_block); LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function); LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block); - LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context); + LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context); LLVMValueRef res; if (first_instr) { @@ -4203,38 +5038,75 @@ ac_build_alloca(struct nir_to_llvm_context *ctx, return res; } -static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx, +static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type, const char *name) { - LLVMValueRef ptr = ac_build_alloca(ctx, type, name); - LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr); + LLVMValueRef ptr = ac_build_alloca(ac, type, name); + LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr); return ptr; } static void -handle_shader_output_decl(struct nir_to_llvm_context *ctx, - struct nir_variable *variable) +scan_shader_output_decl(struct nir_to_llvm_context *ctx, + struct nir_variable *variable) { int idx = variable->data.location + variable->data.index; unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + uint64_t mask_attribs; variable->data.driver_location = idx * 4; + /* tess ctrl has it's own load/store paths for outputs */ + if (ctx->stage == MESA_SHADER_TESS_CTRL) + return; + + mask_attribs = ((1ull << attrib_count) - 1) << idx; if (ctx->stage == MESA_SHADER_VERTEX || + ctx->stage == MESA_SHADER_TESS_EVAL || ctx->stage == MESA_SHADER_GEOMETRY) { - if (idx == VARYING_SLOT_CLIP_DIST0 || - idx == VARYING_SLOT_CULL_DIST0) { - int length = glsl_get_length(variable->type); - if (idx == VARYING_SLOT_CLIP_DIST0) { - if (ctx->stage == MESA_SHADER_VERTEX) - ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << length) - 1; - ctx->num_output_clips = length; - } else if (idx == VARYING_SLOT_CULL_DIST0) { - if (ctx->stage == MESA_SHADER_VERTEX) - ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << length) - 1; - ctx->num_output_culls = length; + if (idx == VARYING_SLOT_CLIP_DIST0) { + int length = ctx->num_output_clips + ctx->num_output_culls; + if (ctx->stage == MESA_SHADER_VERTEX) { + ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1; + ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1; + } + if (ctx->stage == MESA_SHADER_TESS_EVAL) { + ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1; + ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1; } + + if (length > 4) + attrib_count = 2; + else + attrib_count = 1; + mask_attribs = 1ull << idx; + } + } + + ctx->output_mask |= mask_attribs; +} + +static void +handle_shader_output_decl(struct ac_nir_context *ctx, + struct nir_shader *nir, + struct nir_variable *variable) +{ + unsigned output_loc = variable->data.driver_location / 4; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + + /* tess ctrl has it's own load/store paths for outputs */ + if (ctx->stage == MESA_SHADER_TESS_CTRL) + return; + + if (ctx->stage == MESA_SHADER_VERTEX || + ctx->stage == MESA_SHADER_TESS_EVAL || + ctx->stage == MESA_SHADER_GEOMETRY) { + int idx = variable->data.location + variable->data.index; + if (idx == VARYING_SLOT_CLIP_DIST0) { + int length = nir->info.clip_distance_array_size + + nir->info.cull_distance_array_size; + if (length > 4) attrib_count = 2; else @@ -4244,15 +5116,76 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx, for (unsigned i = 0; i < attrib_count; ++i) { for (unsigned chan = 0; chan < 4; chan++) { - ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] = - si_build_alloca_undef(ctx, ctx->f32, ""); + ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] = + si_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); } } - ctx->output_mask |= ((1ull << attrib_count) - 1) << idx; +} + +static LLVMTypeRef +glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx, + enum glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SUBROUTINE: + return ctx->i32; + case GLSL_TYPE_FLOAT: /* TODO handle mediump */ + return ctx->f32; + case GLSL_TYPE_INT64: + case GLSL_TYPE_UINT64: + return ctx->i64; + case GLSL_TYPE_DOUBLE: + return ctx->f64; + default: + unreachable("unknown GLSL type"); + } +} + +static LLVMTypeRef +glsl_to_llvm_type(struct nir_to_llvm_context *ctx, + const struct glsl_type *type) +{ + if (glsl_type_is_scalar(type)) { + return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)); + } + + if (glsl_type_is_vector(type)) { + return LLVMVectorType( + glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)), + glsl_get_vector_elements(type)); + } + + if (glsl_type_is_matrix(type)) { + return LLVMArrayType( + glsl_to_llvm_type(ctx, glsl_get_column_type(type)), + glsl_get_matrix_columns(type)); + } + + if (glsl_type_is_array(type)) { + return LLVMArrayType( + glsl_to_llvm_type(ctx, glsl_get_array_element(type)), + glsl_get_length(type)); + } + + assert(glsl_type_is_struct(type)); + + LLVMTypeRef member_types[glsl_get_length(type)]; + + for (unsigned i = 0; i < glsl_get_length(type); i++) { + member_types[i] = + glsl_to_llvm_type(ctx, + glsl_get_struct_field(type, i)); + } + + return LLVMStructTypeInContext(ctx->context, member_types, + glsl_get_length(type), false); } static void -setup_locals(struct nir_to_llvm_context *ctx, +setup_locals(struct ac_nir_context *ctx, struct nir_function *func) { int i, j; @@ -4269,13 +5202,27 @@ setup_locals(struct nir_to_llvm_context *ctx, for (i = 0; i < ctx->num_locals; i++) { for (j = 0; j < 4; j++) { ctx->locals[i * 4 + j] = - si_build_alloca_undef(ctx, ctx->f32, "temp"); + si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp"); } } } +static void +setup_shared(struct ac_nir_context *ctx, + struct nir_shader *nir) +{ + nir_foreach_variable(variable, &nir->shared) { + LLVMValueRef shared = + LLVMAddGlobalInAddressSpace( + ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type), + variable->name ? variable->name : "", + LOCAL_ADDR_SPACE); + _mesa_hash_table_insert(ctx->vars, variable, shared); + } +} + static LLVMValueRef -emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi) +emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi) { v = to_float(ctx, v); v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, lo)); @@ -4385,7 +5332,7 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, case V_028714_SPI_SHADER_SNORM16_ABGR: for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = emit_float_saturate(ctx, values[chan], -1, 1); + val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1); val[chan] = LLVMBuildFMul(ctx->builder, val[chan], LLVMConstReal(ctx->f32, 32767), ""); @@ -4408,8 +5355,8 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0); for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = to_integer(ctx, values[chan]); - val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max); + val[chan] = to_integer(&ctx->ac, values[chan]); + val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], max); } args->compr = 1; @@ -4424,9 +5371,9 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, /* Clamp. */ for (unsigned chan = 0; chan < 4; chan++) { - val[chan] = to_integer(ctx, values[chan]); - val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max); - val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min); + val[chan] = to_integer(&ctx->ac, values[chan]); + val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], max); + val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], min); } args->compr = 1; @@ -4444,11 +5391,12 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx, memcpy(&args->out[0], values, sizeof(values[0]) * 4); for (unsigned i = 0; i < 4; ++i) - args->out[i] = to_float(ctx, args->out[i]); + args->out[i] = to_float(&ctx->ac, args->out[i]); } static void handle_vs_outputs_post(struct nir_to_llvm_context *ctx, + bool export_prim_id, struct ac_vs_output_info *outinfo) { uint32_t param_count = 0; @@ -4457,14 +5405,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, struct ac_export_args args, pos_args[4] = {}; LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; int i; - const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) | - (1ull << VARYING_SLOT_CLIP_DIST1) | - (1ull << VARYING_SLOT_CULL_DIST0) | - (1ull << VARYING_SLOT_CULL_DIST1)); - - outinfo->prim_id_output = 0xffffffff; - outinfo->layer_output = 0xffffffff; - if (clip_mask) { + + memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, + sizeof(outinfo->vs_output_param_offset)); + + if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { LLVMValueRef slots[8]; unsigned j; @@ -4472,13 +5417,9 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, outinfo->cull_dist_mask <<= ctx->num_output_clips; i = VARYING_SLOT_CLIP_DIST0; - for (j = 0; j < ctx->num_output_clips; j++) - slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); - i = VARYING_SLOT_CULL_DIST0; - for (j = 0; j < ctx->num_output_culls; j++) - slots[ctx->num_output_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) + slots[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) slots[i] = LLVMGetUndef(ctx->f32); @@ -4497,68 +5438,30 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, } - for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef values[4]; - if (!(ctx->output_mask & (1ull << i))) - continue; - + LLVMValueRef pos_values[4] = {ctx->f32zero, ctx->f32zero, ctx->f32zero, ctx->f32one}; + if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { for (unsigned j = 0; j < 4; j++) - values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); - - if (i == VARYING_SLOT_POS) { - target = V_008DFC_SQ_EXP_POS; - } else if (i == VARYING_SLOT_CLIP_DIST0 || - i == VARYING_SLOT_CLIP_DIST1 || - i == VARYING_SLOT_CULL_DIST0 || - i == VARYING_SLOT_CULL_DIST1) { - continue; - } else if (i == VARYING_SLOT_PSIZ) { - outinfo->writes_pointsize = true; - psize_value = values[0]; - continue; - } else if (i == VARYING_SLOT_LAYER) { - outinfo->writes_layer = true; - layer_value = values[0]; - outinfo->layer_output = param_count; - target = V_008DFC_SQ_EXP_PARAM + param_count; - param_count++; - } else if (i == VARYING_SLOT_VIEWPORT) { - outinfo->writes_viewport_index = true; - viewport_index_value = values[0]; - continue; - } else if (i == VARYING_SLOT_PRIMITIVE_ID) { - outinfo->prim_id_output = param_count; - target = V_008DFC_SQ_EXP_PARAM + param_count; - param_count++; - } else if (i >= VARYING_SLOT_VAR0) { - outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0); - target = V_008DFC_SQ_EXP_PARAM + param_count; - param_count++; - } + pos_values[j] = LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], ""); + } + si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]); - si_llvm_init_export_args(ctx, values, target, &args); + if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { + outinfo->writes_pointsize = true; + psize_value = LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], ""); + } - if (target >= V_008DFC_SQ_EXP_POS && - target <= (V_008DFC_SQ_EXP_POS + 3)) { - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - } else { - ac_build_export(&ctx->ac, &args); - } + if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { + outinfo->writes_layer = true; + layer_value = LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], ""); } - /* We need to add the position output manually if it's missing. */ - if (!pos_args[0].out[0]) { - pos_args[0].enabled_channels = 0xf; - pos_args[0].valid_mask = 0; - pos_args[0].done = 0; - pos_args[0].target = V_008DFC_SQ_EXP_POS; - pos_args[0].compr = 0; - pos_args[0].out[0] = ctx->f32zero; /* X */ - pos_args[0].out[1] = ctx->f32zero; /* Y */ - pos_args[0].out[2] = ctx->f32zero; /* Z */ - pos_args[0].out[3] = ctx->f32one; /* W */ + if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { + outinfo->writes_viewport_index = true; + viewport_index_value = LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], ""); } uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) | @@ -4599,6 +5502,58 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx, ac_build_export(&ctx->ac, &pos_args[i]); } + for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef values[4]; + if (!(ctx->output_mask & (1ull << i))) + continue; + + for (unsigned j = 0; j < 4; j++) + values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + + if (i == VARYING_SLOT_LAYER) { + target = V_008DFC_SQ_EXP_PARAM + param_count; + outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count; + param_count++; + } else if (i == VARYING_SLOT_PRIMITIVE_ID) { + target = V_008DFC_SQ_EXP_PARAM + param_count; + outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; + param_count++; + } else if (i >= VARYING_SLOT_VAR0) { + outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0); + target = V_008DFC_SQ_EXP_PARAM + param_count; + outinfo->vs_output_param_offset[i] = param_count; + param_count++; + } else + continue; + + si_llvm_init_export_args(ctx, values, target, &args); + + if (target >= V_008DFC_SQ_EXP_POS && + target <= (V_008DFC_SQ_EXP_POS + 3)) { + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); + } else { + ac_build_export(&ctx->ac, &args); + } + } + + if (export_prim_id) { + LLVMValueRef values[4]; + target = V_008DFC_SQ_EXP_PARAM + param_count; + outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count; + param_count++; + + values[0] = ctx->vs_prim_id; + ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2, + ctx->shader_info->vs.vgpr_comp_cnt); + for (unsigned j = 1; j < 4; j++) + values[j] = ctx->f32zero; + si_llvm_init_export_args(ctx, values, target, &args); + ac_build_export(&ctx->ac, &args); + outinfo->export_prim_id = true; + } + outinfo->pos_exports = num_pos_exports; outinfo->param_exports = param_count; } @@ -4610,23 +5565,19 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx, int j; uint64_t max_output_written = 0; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &ctx->outputs[i * 4]; + LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; int param_index; int length = 4; - int start = 0; + if (!(ctx->output_mask & (1ull << i))) continue; - if (i == VARYING_SLOT_CLIP_DIST0) { - length = ctx->num_output_clips; - } else if (i == VARYING_SLOT_CULL_DIST0) { - start = ctx->num_output_clips; - length = ctx->num_output_culls; - } + if (i == VARYING_SLOT_CLIP_DIST0) + length = ctx->num_output_clips + ctx->num_output_culls; + param_index = shader_io_get_unique_index(i); - if (param_index > max_output_written) - max_output_written = param_index; + max_output_written = MAX2(param_index + (length > 4), max_output_written); for (j = 0; j < length; j++) { LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], ""); @@ -4636,7 +5587,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx, ctx->esgs_ring, out_val, 1, NULL, ctx->es2gs_offset, - (4 * param_index + j + start) * 4, + (4 * param_index + j) * 4, 1, 1, true, true); } } @@ -4644,23 +5595,288 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx, } static void -si_export_mrt_color(struct nir_to_llvm_context *ctx, - LLVMValueRef *color, unsigned param, bool is_last) +handle_ls_outputs_post(struct nir_to_llvm_context *ctx) { + LLVMValueRef vertex_id = ctx->rel_auto_id; + LLVMValueRef vertex_dw_stride = unpack_param(ctx, ctx->ls_out_layout, 13, 8); + LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id, + vertex_dw_stride, ""); - struct ac_export_args args; + for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4]; + int length = 4; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0) + length = ctx->num_output_clips + ctx->num_output_culls; + int param = shader_io_get_unique_index(i); + mark_tess_output(ctx, false, param); + if (length > 4) + mark_tess_output(ctx, false, param + 1); + LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr, + LLVMConstInt(ctx->i32, param * 4, false), + ""); + for (unsigned j = 0; j < length; j++) { + lds_store(ctx, dw_addr, + LLVMBuildLoad(ctx->builder, out_ptr[j], "")); + dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, ""); + } + } +} + +struct ac_build_if_state +{ + struct nir_to_llvm_context *ctx; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block; + LLVMBasicBlockRef true_block; + LLVMBasicBlockRef false_block; + LLVMBasicBlockRef merge_block; +}; + +static LLVMBasicBlockRef +ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + /* get current basic block */ + current_block = LLVMGetInsertBlock(ctx->builder); + + /* chqeck if there's another block after this one */ + next_block = LLVMGetNextBasicBlock(current_block); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); + } + else { + /* append new block after current block */ + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); + } + return new_block; +} + +static void +ac_nir_build_if(struct ac_build_if_state *ifthen, + struct nir_to_llvm_context *ctx, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder); + + memset(ifthen, 0, sizeof *ifthen); + ifthen->ctx = ctx; + ifthen->condition = condition; + ifthen->entry_block = block; + + /* create endif/merge basic block for the phi functions */ + ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); + + /* create/insert true_block before merge_block */ + ifthen->true_block = + LLVMInsertBasicBlockInContext(ctx->context, + ifthen->merge_block, + "if-true-block"); + + /* successive code goes into the true block */ + LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block); +} + +/** + * End a conditional. + */ +static void +ac_nir_build_endif(struct ac_build_if_state *ifthen) +{ + LLVMBuilderRef builder = ifthen->ctx->builder; + + /* Insert branch to the merge block from current block */ + LLVMBuildBr(builder, ifthen->merge_block); + + /* + * Now patch in the various branch instructions. + */ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); + if (ifthen->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); + } + + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); +} + +static void +write_tess_factors(struct nir_to_llvm_context *ctx) +{ + unsigned stride, outer_comps, inner_comps; + struct ac_build_if_state if_ctx, inner_if_ctx; + LLVMValueRef invocation_id = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5); + LLVMValueRef rel_patch_id = unpack_param(ctx, ctx->tcs_rel_ids, 0, 8); + unsigned tess_inner_index, tess_outer_index; + LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; + LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; + int i; + emit_barrier(ctx); + + switch (ctx->options->key.tcs.primitive_mode) { + case GL_ISOLINES: + stride = 2; + outer_comps = 2; + inner_comps = 0; + break; + case GL_TRIANGLES: + stride = 4; + outer_comps = 3; + inner_comps = 1; + break; + case GL_QUADS: + stride = 6; + outer_comps = 4; + inner_comps = 2; + break; + default: + return; + } + + ac_nir_build_if(&if_ctx, ctx, + LLVMBuildICmp(ctx->builder, LLVMIntEQ, + invocation_id, ctx->i32zero, "")); + + tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); + tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); + + mark_tess_output(ctx, true, tess_inner_index); + mark_tess_output(ctx, true, tess_outer_index); + lds_base = get_tcs_out_current_patch_data_offset(ctx); + lds_inner = LLVMBuildAdd(ctx->builder, lds_base, + LLVMConstInt(ctx->i32, tess_inner_index * 4, false), ""); + lds_outer = LLVMBuildAdd(ctx->builder, lds_base, + LLVMConstInt(ctx->i32, tess_outer_index * 4, false), ""); + + for (i = 0; i < 4; i++) { + inner[i] = LLVMGetUndef(ctx->i32); + outer[i] = LLVMGetUndef(ctx->i32); + } + // LINES reverseal + if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { + outer[0] = out[1] = lds_load(ctx, lds_outer); + lds_outer = LLVMBuildAdd(ctx->builder, lds_outer, + LLVMConstInt(ctx->i32, 1, false), ""); + outer[1] = out[0] = lds_load(ctx, lds_outer); + } else { + for (i = 0; i < outer_comps; i++) { + outer[i] = out[i] = + lds_load(ctx, lds_outer); + lds_outer = LLVMBuildAdd(ctx->builder, lds_outer, + LLVMConstInt(ctx->i32, 1, false), ""); + } + for (i = 0; i < inner_comps; i++) { + inner[i] = out[outer_comps+i] = + lds_load(ctx, lds_inner); + lds_inner = LLVMBuildAdd(ctx->builder, lds_inner, + LLVMConstInt(ctx->i32, 1, false), ""); + } + } + + /* Convert the outputs to vectors for stores. */ + vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4)); + vec1 = NULL; + + if (stride > 4) + vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); + + + buffer = ctx->hs_ring_tess_factor; + tf_base = ctx->tess_factor_offset; + byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id, + LLVMConstInt(ctx->i32, 4 * stride, false), ""); + + ac_nir_build_if(&inner_if_ctx, ctx, + LLVMBuildICmp(ctx->builder, LLVMIntEQ, + rel_patch_id, ctx->i32zero, "")); + + /* Store the dynamic HS control word. */ + ac_build_buffer_store_dword(&ctx->ac, buffer, + LLVMConstInt(ctx->i32, 0x80000000, false), + 1, ctx->i32zero, tf_base, + 0, 1, 0, true, false); + ac_nir_build_endif(&inner_if_ctx); + + /* Store the tessellation factors. */ + ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, + MIN2(stride, 4), byteoffset, tf_base, + 4, 1, 0, true, false); + if (vec1) + ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, + stride - 4, byteoffset, tf_base, + 20, 1, 0, true, false); + + //TODO store to offchip for TES to read - only if TES reads them + if (1) { + LLVMValueRef inner_vec, outer_vec, tf_outer_offset; + LLVMValueRef tf_inner_offset; + unsigned param_outer, param_inner; + + param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); + tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL, + LLVMConstInt(ctx->i32, param_outer, 0)); + + outer_vec = ac_build_gather_values(&ctx->ac, outer, + util_next_power_of_two(outer_comps)); + + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, + outer_comps, tf_outer_offset, + ctx->oc_lds, 0, 1, 0, true, false); + if (inner_comps) { + param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); + tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, + LLVMConstInt(ctx->i32, param_inner, 0)); + + inner_vec = inner_comps == 1 ? inner[0] : + ac_build_gather_values(&ctx->ac, inner, inner_comps); + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, + inner_comps, tf_inner_offset, + ctx->oc_lds, 0, 1, 0, true, false); + } + } + ac_nir_build_endif(&if_ctx); +} + +static void +handle_tcs_outputs_post(struct nir_to_llvm_context *ctx) +{ + write_tess_factors(ctx); +} + +static bool +si_export_mrt_color(struct nir_to_llvm_context *ctx, + LLVMValueRef *color, unsigned param, bool is_last, + struct ac_export_args *args) +{ /* Export */ si_llvm_init_export_args(ctx, color, param, - &args); + args); if (is_last) { - args.valid_mask = 1; /* whether the EXEC mask is valid */ - args.done = 1; /* DONE bit */ - } else if (!args.enabled_channels) - return; /* unnecessary NULL export */ + args->valid_mask = 1; /* whether the EXEC mask is valid */ + args->done = 1; /* DONE bit */ + } else if (!args->enabled_channels) + return false; /* unnecessary NULL export */ - ac_build_export(&ctx->ac, &args); + return true; } static void @@ -4696,10 +5912,11 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx, args.enabled_channels |= 0x4; } - /* SI (except OLAND) has a bug that it only looks + /* SI (except OLAND and HAINAN) has a bug that it only looks * at the X writemask component. */ if (ctx->options->chip_class == SI && - ctx->options->family != CHIP_OLAND) + ctx->options->family != CHIP_OLAND && + ctx->options->family != CHIP_HAINAN) args.enabled_channels |= 0x1; ac_build_export(&ctx->ac, &args); @@ -4710,6 +5927,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx) { unsigned index = 0; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; + struct ac_export_args color_args[8]; for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { LLVMValueRef values[4]; @@ -4719,34 +5937,39 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx) if (i == FRAG_RESULT_DEPTH) { ctx->shader_info->fs.writes_z = true; - depth = to_float(ctx, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); + depth = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else if (i == FRAG_RESULT_STENCIL) { ctx->shader_info->fs.writes_stencil = true; - stencil = to_float(ctx, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); + stencil = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else if (i == FRAG_RESULT_SAMPLE_MASK) { ctx->shader_info->fs.writes_sample_mask = true; - samplemask = to_float(ctx, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); + samplemask = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], "")); } else { bool last = false; for (unsigned j = 0; j < 4; j++) - values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder, - ctx->outputs[radeon_llvm_reg_index_soa(i, j)], "")); + values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder, + ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], "")); if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask) last = ctx->output_mask <= ((1ull << (i + 1)) - 1); - si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last); - index++; + bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]); + if (ret) + index++; } } + for (unsigned i = 0; i < index; i++) + ac_build_export(&ctx->ac, &color_args[i]); if (depth || stencil || samplemask) si_export_mrt_z(ctx, depth, stencil, samplemask); - else if (!index) - si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true); + else if (!index) { + si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]); + ac_build_export(&ctx->ac, &color_args[0]); + } ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0; } @@ -4758,14 +5981,20 @@ emit_gs_epilogue(struct nir_to_llvm_context *ctx) } static void -handle_shader_outputs_post(struct nir_to_llvm_context *ctx) +handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, + LLVMValueRef *addrs) { + struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi); + switch (ctx->stage) { case MESA_SHADER_VERTEX: - if (ctx->options->key.vs.as_es) + if (ctx->options->key.vs.as_ls) + handle_ls_outputs_post(ctx); + else if (ctx->options->key.vs.as_es) handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); else - handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo); + handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, + &ctx->shader_info->vs.outinfo); break; case MESA_SHADER_FRAGMENT: handle_fs_outputs_post(ctx); @@ -4773,20 +6002,21 @@ handle_shader_outputs_post(struct nir_to_llvm_context *ctx) case MESA_SHADER_GEOMETRY: emit_gs_epilogue(ctx); break; + case MESA_SHADER_TESS_CTRL: + handle_tcs_outputs_post(ctx); + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->options->key.tes.as_es) + handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info); + else + handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, + &ctx->shader_info->tes.outinfo); + break; default: break; } } -static void -handle_shared_compute_var(struct nir_to_llvm_context *ctx, - struct nir_variable *variable, uint32_t *offset, int idx) -{ - unsigned size = glsl_count_attribute_slots(variable->type, false); - variable->data.driver_location = *offset; - *offset += size; -} - static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) { LLVMPassManagerRef passmgr; @@ -4813,20 +6043,54 @@ static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx) LLVMDisposePassManager(passmgr); } +static void +ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx) +{ + struct ac_vs_output_info *outinfo; + + switch (ctx->stage) { + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_GEOMETRY: + return; + case MESA_SHADER_VERTEX: + if (ctx->options->key.vs.as_ls || + ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->vs.outinfo; + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->tes.outinfo; + break; + default: + unreachable("Unhandled shader type"); + } + + ac_optimize_vs_outputs(&ctx->ac, + ctx->main_function, + outinfo->vs_output_param_offset, + VARYING_SLOT_MAX, + &outinfo->param_exports); +} + static void ac_setup_rings(struct nir_to_llvm_context *ctx) { - if (ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) { - ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, ctx->i32one); + if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || + (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { + ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false)); } if (ctx->is_gs_copy_shader) { - ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, 3, false)); + ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false)); } if (ctx->stage == MESA_SHADER_GEOMETRY) { LLVMValueRef tmp; - ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, 2, false)); - ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, 4, false)); + ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false)); + ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false)); ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, ""); @@ -4834,11 +6098,85 @@ ac_setup_rings(struct nir_to_llvm_context *ctx) tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->i32one, ""); tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, ""); ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->i32one, ""); + } - ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v16i8, ""); + if (ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_TESS_EVAL) { + ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false)); + ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false)); } } +static unsigned +ac_nir_get_max_workgroup_size(enum chip_class chip_class, + const struct nir_shader *nir) +{ + switch (nir->stage) { + case MESA_SHADER_TESS_CTRL: + return chip_class >= CIK ? 128 : 64; + case MESA_SHADER_GEOMETRY: + return 64; + case MESA_SHADER_COMPUTE: + break; + default: + return 0; + } + + unsigned max_workgroup_size = nir->info.cs.local_size[0] * + nir->info.cs.local_size[1] * + nir->info.cs.local_size[2]; + return max_workgroup_size; +} + +void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, + struct nir_shader *nir, struct nir_to_llvm_context *nctx) +{ + struct ac_nir_context ctx = {}; + struct nir_function *func; + + ctx.ac = *ac; + ctx.abi = abi; + + ctx.nctx = nctx; + if (nctx) + nctx->nir = &ctx; + + ctx.stage = nir->stage; + + ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); + + nir_foreach_variable(variable, &nir->outputs) + handle_shader_output_decl(&ctx, nir, variable); + + ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + func = (struct nir_function *)exec_list_get_head(&nir->functions); + + setup_locals(&ctx, func); + + if (nir->stage == MESA_SHADER_COMPUTE) + setup_shared(&ctx, nir); + + visit_cf_list(&ctx, &func->impl->body); + phi_post_pass(&ctx); + + ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS, + ctx.outputs); + + free(ctx.locals); + ralloc_free(ctx.defs); + ralloc_free(ctx.phis); + ralloc_free(ctx.vars); + + if (nctx) + nctx->nir = NULL; +} + static LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, struct nir_shader *nir, @@ -4846,7 +6184,6 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, const struct ac_nir_compiler_options *options) { struct nir_to_llvm_context ctx = {0}; - struct nir_function *func; unsigned i; ctx.options = options; ctx.shader_info = shader_info; @@ -4860,6 +6197,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, memset(shader_info, 0, sizeof(*shader_info)); + ac_nir_shader_info_pass(nir, options, &shader_info->info); + LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); @@ -4873,6 +6212,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, ctx.builder = LLVMCreateBuilderInContext(ctx.context); ctx.ac.builder = ctx.builder; ctx.stage = nir->stage; + ctx.max_workgroup_size = ac_nir_get_max_workgroup_size(ctx.options->chip_class, nir); for (i = 0; i < AC_UD_MAX_SETS; i++) shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; @@ -4881,70 +6221,60 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, create_function(&ctx); - if (nir->stage == MESA_SHADER_COMPUTE) { - int num_shared = 0; - nir_foreach_variable(variable, &nir->shared) - num_shared++; - if (num_shared) { - int idx = 0; - uint32_t shared_size = 0; - LLVMValueRef var; - LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE); - nir_foreach_variable(variable, &nir->shared) { - handle_shared_compute_var(&ctx, variable, &shared_size, idx); - idx++; - } - - shared_size *= 16; - var = LLVMAddGlobalInAddressSpace(ctx.module, - LLVMArrayType(ctx.i8, shared_size), - "compute_lds", - LOCAL_ADDR_SPACE); - LLVMSetAlignment(var, 4); - ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, ""); + if (nir->stage == MESA_SHADER_GEOMETRY) { + ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.i32, "gs_next_vertex"); + + ctx.gs_max_out_vertices = nir->info.gs.vertices_out; + } else if (nir->stage == MESA_SHADER_TESS_EVAL) { + ctx.tes_primitive_mode = nir->info.tess.primitive_mode; + } else if (nir->stage == MESA_SHADER_VERTEX) { + if (shader_info->info.vs.needs_instance_id) { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(3, ctx.shader_info->vs.vgpr_comp_cnt); } - } else if (nir->stage == MESA_SHADER_GEOMETRY) { - ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex"); - - ctx.gs_max_out_vertices = nir->info->gs.vertices_out; + } else if (nir->stage == MESA_SHADER_FRAGMENT) { + shader_info->fs.can_discard = nir->info.fs.uses_discard; } ac_setup_rings(&ctx); + ctx.num_output_clips = nir->info.clip_distance_array_size; + ctx.num_output_culls = nir->info.cull_distance_array_size; + nir_foreach_variable(variable, &nir->inputs) handle_shader_input_decl(&ctx, variable); if (nir->stage == MESA_SHADER_FRAGMENT) handle_fs_inputs_pre(&ctx, nir); - nir_foreach_variable(variable, &nir->outputs) - handle_shader_output_decl(&ctx, variable); + ctx.abi.chip_class = options->chip_class; + ctx.abi.inputs = &ctx.inputs[0]; + ctx.abi.emit_outputs = handle_shader_outputs_post; + ctx.abi.load_sampler_desc = radv_get_sampler_desc; - ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - func = (struct nir_function *)exec_list_get_head(&nir->functions); - - setup_locals(&ctx, func); + nir_foreach_variable(variable, &nir->outputs) + scan_shader_output_decl(&ctx, variable); - visit_cf_list(&ctx, &func->impl->body); - phi_post_pass(&ctx); + ac_nir_translate(&ctx.ac, &ctx.abi, nir, &ctx); - handle_shader_outputs_post(&ctx); LLVMBuildRetVoid(ctx.builder); ac_llvm_finalize_module(&ctx); - free(ctx.locals); - ralloc_free(ctx.defs); - ralloc_free(ctx.phis); + + ac_nir_eliminate_const_vs_outputs(&ctx); if (nir->stage == MESA_SHADER_GEOMETRY) { - shader_info->gs.gsvs_vertex_size = util_bitcount64(ctx.output_mask) * 16; + unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4; + shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size * - nir->info->gs.vertices_out; + nir->info.gs.vertices_out; + } else if (nir->stage == MESA_SHADER_TESS_CTRL) { + shader_info->tcs.outputs_written = ctx.tess_outputs_written; + shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written; + } else if (nir->stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) { + shader_info->vs.outputs_written = ctx.tess_outputs_written; } + return ctx.module; } @@ -5090,19 +6420,33 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm, switch (nir->stage) { case MESA_SHADER_COMPUTE: for (int i = 0; i < 3; ++i) - shader_info->cs.block_size[i] = nir->info->cs.local_size[i]; + shader_info->cs.block_size[i] = nir->info.cs.local_size[i]; break; case MESA_SHADER_FRAGMENT: - shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests; + shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests; break; case MESA_SHADER_GEOMETRY: - shader_info->gs.vertices_in = nir->info->gs.vertices_in; - shader_info->gs.vertices_out = nir->info->gs.vertices_out; - shader_info->gs.output_prim = nir->info->gs.output_primitive; - shader_info->gs.invocations = nir->info->gs.invocations; + shader_info->gs.vertices_in = nir->info.gs.vertices_in; + shader_info->gs.vertices_out = nir->info.gs.vertices_out; + shader_info->gs.output_prim = nir->info.gs.output_primitive; + shader_info->gs.invocations = nir->info.gs.invocations; + break; + case MESA_SHADER_TESS_EVAL: + shader_info->tes.primitive_mode = nir->info.tess.primitive_mode; + shader_info->tes.spacing = nir->info.tess.spacing; + shader_info->tes.ccw = nir->info.tess.ccw; + shader_info->tes.point_mode = nir->info.tess.point_mode; + shader_info->tes.as_es = options->key.tes.as_es; + break; + case MESA_SHADER_TESS_CTRL: + shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; break; case MESA_SHADER_VERTEX: shader_info->vs.as_es = options->key.vs.as_es; + shader_info->vs.as_ls = options->key.vs.as_ls; + /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */ + if (options->key.vs.as_ls) + shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt); break; default: break; @@ -5114,7 +6458,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) { LLVMValueRef args[9]; args[0] = ctx->gsvs_ring; - args[1] = LLVMBuildMul(ctx->builder, ctx->vertex_id, LLVMConstInt(ctx->i32, 4, false), ""); + args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->i32, 4, false), ""); args[3] = ctx->i32zero; args[4] = ctx->i32one; /* OFFEN */ args[5] = ctx->i32zero; /* IDXEN */ @@ -5123,42 +6467,25 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) args[8] = ctx->i32zero; /* TFE */ int idx = 0; - int clip_cull_slot = -1; + for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) { int length = 4; - int start = 0; int slot = idx; int slot_inc = 1; if (!(ctx->output_mask & (1ull << i))) continue; - if (i == VARYING_SLOT_CLIP_DIST1 || - i == VARYING_SLOT_CULL_DIST1) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0 || - i == VARYING_SLOT_CULL_DIST0) { + if (i == VARYING_SLOT_CLIP_DIST0) { /* unpack clip and cull from a single set of slots */ - if (clip_cull_slot == -1) { - clip_cull_slot = idx; - if (ctx->num_output_clips + ctx->num_output_culls > 4) - slot_inc = 2; - } else { - slot = clip_cull_slot; - slot_inc = 0; - } - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips; - if (i == VARYING_SLOT_CULL_DIST0) { - start = ctx->num_output_clips; - length = ctx->num_output_culls; - } + length = ctx->num_output_clips + ctx->num_output_culls; + if (length > 4) + slot_inc = 2; } for (unsigned j = 0; j < length; j++) { LLVMValueRef value; args[2] = LLVMConstInt(ctx->i32, - (slot * 4 + j + start) * + (slot * 4 + j) * ctx->gs_max_out_vertices * 16 * 4, false); value = ac_build_intrinsic(&ctx->ac, @@ -5168,11 +6495,11 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx) AC_FUNC_ATTR_LEGACY); LLVMBuildStore(ctx->builder, - to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]); + to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]); } idx += slot_inc; } - handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo); + handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo); } void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, @@ -5202,14 +6529,28 @@ void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, create_function(&ctx); - ctx.gs_max_out_vertices = geom_shader->info->gs.vertices_out; + ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out; ac_setup_rings(&ctx); - nir_foreach_variable(variable, &geom_shader->outputs) - handle_shader_output_decl(&ctx, variable); + ctx.num_output_clips = geom_shader->info.clip_distance_array_size; + ctx.num_output_culls = geom_shader->info.cull_distance_array_size; + + struct ac_nir_context nir_ctx = {}; + nir_ctx.ac = ctx.ac; + nir_ctx.abi = &ctx.abi; + + nir_ctx.nctx = &ctx; + ctx.nir = &nir_ctx; + + nir_foreach_variable(variable, &geom_shader->outputs) { + scan_shader_output_decl(&ctx, variable); + handle_shader_output_decl(&nir_ctx, geom_shader, variable); + } ac_gs_copy_shader_emit(&ctx); + ctx.nir = NULL; + LLVMBuildRetVoid(ctx.builder); ac_llvm_finalize_module(&ctx);