From b2653007b980e6fc9e226687003784c5b3fe5bcb Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 9 Mar 2018 16:58:10 +0100 Subject: [PATCH] ac/nir: move all RADV related code to radv_nir_to_llvm.c Now the "ac/nir" prefix will really be the shared code between RadeonSI and RADV, that might avoid confusions in the future. Signed-off-by: Samuel Pitoiset Acked-by: Bas Nieuwenhuizen --- src/amd/common/ac_nir_to_llvm.c | 9403 +++++++++-------------------- src/amd/common/ac_nir_to_llvm.h | 17 - src/amd/vulkan/Makefile.sources | 1 + src/amd/vulkan/meson.build | 1 + src/amd/vulkan/radv_nir_to_llvm.c | 3456 +++++++++++ src/amd/vulkan/radv_private.h | 18 + src/amd/vulkan/radv_shader.c | 11 +- 7 files changed, 6482 insertions(+), 6425 deletions(-) create mode 100644 src/amd/vulkan/radv_nir_to_llvm.c diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index ceda8c540d6..3a9cb7b9209 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -35,16 +35,6 @@ #include "ac_shader_util.h" #include "ac_exp_param.h" -enum radeon_llvm_calling_convention { - RADEON_LLVM_AMDGPU_VS = 87, - RADEON_LLVM_AMDGPU_GS = 88, - RADEON_LLVM_AMDGPU_PS = 89, - RADEON_LLVM_AMDGPU_CS = 90, - RADEON_LLVM_AMDGPU_HS = 93, -}; - -#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) - struct ac_nir_context { struct ac_llvm_context ac; struct ac_shader_abi *abi; @@ -63,1515 +53,479 @@ struct ac_nir_context { LLVMValueRef *locals; }; -struct radv_shader_context { - struct ac_llvm_context ac; - const struct ac_nir_compiler_options *options; - struct ac_shader_variant_info *shader_info; - struct ac_shader_abi abi; - - unsigned max_workgroup_size; - LLVMContextRef context; - LLVMValueRef main_function; - - LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; - LLVMValueRef ring_offsets; - - LLVMValueRef vertex_buffers; - LLVMValueRef rel_auto_id; - LLVMValueRef vs_prim_id; - LLVMValueRef ls_out_layout; - LLVMValueRef es2gs_offset; - - LLVMValueRef tcs_offchip_layout; - LLVMValueRef tcs_out_offsets; - LLVMValueRef tcs_out_layout; - LLVMValueRef tcs_in_layout; - LLVMValueRef oc_lds; - LLVMValueRef merged_wave_info; - LLVMValueRef tess_factor_offset; - LLVMValueRef tes_rel_patch_id; - LLVMValueRef tes_u; - LLVMValueRef tes_v; - - LLVMValueRef gsvs_ring_stride; - LLVMValueRef gsvs_num_entries; - LLVMValueRef gs2vs_offset; - LLVMValueRef gs_wave_id; - LLVMValueRef gs_vtx_offset[6]; - - LLVMValueRef esgs_ring; - LLVMValueRef gsvs_ring; - LLVMValueRef hs_ring_tess_offchip; - LLVMValueRef hs_ring_tess_factor; - - LLVMValueRef sample_pos_offset; - LLVMValueRef persp_sample, persp_center, persp_centroid; - LLVMValueRef linear_sample, linear_center, linear_centroid; - - gl_shader_stage stage; - - LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; - - uint64_t input_mask; - uint64_t output_mask; - uint8_t num_output_clips; - uint8_t num_output_culls; - - bool is_gs_copy_shader; - LLVMValueRef gs_next_vertex; - unsigned gs_max_out_vertices; - - unsigned tes_primitive_mode; - uint64_t tess_outputs_written; - uint64_t tess_patch_outputs_written; - - uint32_t tcs_patch_outputs_read; - uint64_t tcs_outputs_read; - uint32_t tcs_vertices_per_patch; -}; - -static inline struct radv_shader_context * -radv_shader_context_from_abi(struct ac_shader_abi *abi) -{ - struct radv_shader_context *ctx = NULL; - return container_of(abi, ctx, abi); -} - static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, const nir_deref_var *deref, enum ac_descriptor_type desc_type, const nir_tex_instr *instr, bool image, bool write); -static unsigned shader_io_get_unique_index(gl_varying_slot slot) +static void +build_store_values_extended(struct ac_llvm_context *ac, + LLVMValueRef *values, + unsigned value_count, + unsigned value_stride, + LLVMValueRef vec) { - /* handle patch indices separate */ - if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) - return 0; - if (slot == VARYING_SLOT_TESS_LEVEL_INNER) - return 1; - if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) - return 2 + (slot - VARYING_SLOT_PATCH0); + LLVMBuilderRef builder = ac->builder; + unsigned i; - if (slot == VARYING_SLOT_POS) - return 0; - if (slot == VARYING_SLOT_PSIZ) - return 1; - if (slot == VARYING_SLOT_CLIP_DIST0) - return 2; - /* 3 is reserved for clip dist as well */ - if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) - return 4 + (slot - VARYING_SLOT_VAR0); - unreachable("illegal slot in get unique index\n"); + for (i = 0; i < value_count; i++) { + LLVMValueRef ptr = values[i * value_stride]; + LLVMValueRef index = LLVMConstInt(ac->i32, i, false); + LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); + LLVMBuildStore(builder, value, ptr); + } } -static void set_llvm_calling_convention(LLVMValueRef func, - gl_shader_stage stage) +static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, + const nir_ssa_def *def) { - enum radeon_llvm_calling_convention calling_conv; - - switch (stage) { - case MESA_SHADER_VERTEX: - case MESA_SHADER_TESS_EVAL: - calling_conv = RADEON_LLVM_AMDGPU_VS; - break; - case MESA_SHADER_GEOMETRY: - calling_conv = RADEON_LLVM_AMDGPU_GS; - break; - case MESA_SHADER_TESS_CTRL: - calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS; - break; - case MESA_SHADER_FRAGMENT: - calling_conv = RADEON_LLVM_AMDGPU_PS; - break; - case MESA_SHADER_COMPUTE: - calling_conv = RADEON_LLVM_AMDGPU_CS; - break; - default: - unreachable("Unhandle shader type"); + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size); + if (def->num_components > 1) { + type = LLVMVectorType(type, def->num_components); } - - LLVMSetFunctionCallConv(func, calling_conv); + return type; } -#define MAX_ARGS 23 -struct arg_info { - LLVMTypeRef types[MAX_ARGS]; - LLVMValueRef *assign[MAX_ARGS]; - unsigned array_params_mask; - uint8_t count; - uint8_t sgpr_count; - uint8_t num_sgprs_used; - uint8_t num_vgprs_used; -}; - -enum ac_arg_regfile { - ARG_SGPR, - ARG_VGPR, -}; - -static void -add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type, - LLVMValueRef *param_ptr) +static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src) { - assert(info->count < MAX_ARGS); - - info->assign[info->count] = param_ptr; - info->types[info->count] = type; - info->count++; - - if (regfile == ARG_SGPR) { - info->num_sgprs_used += ac_get_type_size(type) / 4; - info->sgpr_count++; - } else { - assert(regfile == ARG_VGPR); - info->num_vgprs_used += ac_get_type_size(type) / 4; - } + assert(src.is_ssa); + struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa); + return (LLVMValueRef)entry->data; } -static inline void -add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr) +static LLVMValueRef +get_memory_ptr(struct ac_nir_context *ctx, nir_src src) { - info->array_params_mask |= (1 << info->count); - add_arg(info, ARG_SGPR, type, param_ptr); + LLVMValueRef ptr = get_src(ctx, src); + ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, ""); + int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); + + return LLVMBuildBitCast(ctx->ac.builder, ptr, + LLVMPointerType(ctx->ac.i32, addr_space), ""); } -static void assign_arguments(LLVMValueRef main_function, - struct arg_info *info) +static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, + const struct nir_block *b) { - unsigned i; - for (i = 0; i < info->count; i++) { - if (info->assign[i]) - *info->assign[i] = LLVMGetParam(main_function, i); - } + struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b); + return (LLVMBasicBlockRef)entry->data; } -static LLVMValueRef -create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, - LLVMBuilderRef builder, LLVMTypeRef *return_types, - unsigned num_return_elems, - struct arg_info *args, - unsigned max_workgroup_size, - bool unsafe_math) +static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, + nir_alu_src src, + unsigned num_components) { - LLVMTypeRef main_function_type, ret_type; - LLVMBasicBlockRef main_function_body; + LLVMValueRef value = get_src(ctx, src.src); + bool need_swizzle = false; - if (num_return_elems) - ret_type = LLVMStructTypeInContext(ctx, return_types, - num_return_elems, true); - else - ret_type = LLVMVoidTypeInContext(ctx); - - /* Setup the function */ - main_function_type = - LLVMFunctionType(ret_type, args->types, args->count, 0); - LLVMValueRef main_function = - LLVMAddFunction(module, "main", main_function_type); - main_function_body = - LLVMAppendBasicBlockInContext(ctx, main_function, "main_body"); - LLVMPositionBuilderAtEnd(builder, main_function_body); - - LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); - for (unsigned i = 0; i < args->sgpr_count; ++i) { - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); - - if (args->array_params_mask & (1 << i)) { - LLVMValueRef P = LLVMGetParam(main_function, i); - ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); - ac_add_attr_dereferenceable(P, UINT64_MAX); - } + assert(value); + unsigned src_components = ac_get_llvm_num_components(value); + for (unsigned i = 0; i < num_components; ++i) { + assert(src.swizzle[i] < src_components); + if (src.swizzle[i] != i) + need_swizzle = true; } - if (max_workgroup_size) { - ac_llvm_add_target_dep_function_attr(main_function, - "amdgpu-max-work-group-size", - max_workgroup_size); - } - if (unsafe_math) { - /* These were copied from some LLVM test. */ - LLVMAddTargetDependentFunctionAttr(main_function, - "less-precise-fpmad", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "no-infs-fp-math", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "no-nans-fp-math", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "unsafe-fp-math", - "true"); - LLVMAddTargetDependentFunctionAttr(main_function, - "no-signed-zeros-fp-math", - "true"); - } - return main_function; -} + if (need_swizzle || num_components != src_components) { + LLVMValueRef masks[] = { + LLVMConstInt(ctx->ac.i32, src.swizzle[0], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[1], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[2], false), + LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)}; -static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) -{ - switch (ctx->stage) { - case MESA_SHADER_TESS_CTRL: - return ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); - case MESA_SHADER_TESS_EVAL: - return ctx->tes_rel_patch_id; - break; - default: - unreachable("Illegal stage"); + if (src_components > 1 && num_components == 1) { + value = LLVMBuildExtractElement(ctx->ac.builder, value, + masks[0], ""); + } else if (src_components == 1 && num_components > 1) { + LLVMValueRef values[] = {value, value, value, value}; + value = ac_build_gather_values(&ctx->ac, values, num_components); + } else { + LLVMValueRef swizzle = LLVMConstVector(masks, num_components); + value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, + swizzle, ""); + } } + assert(!src.negate); + assert(!src.abs); + return value; } -/* Tessellation shaders pass outputs to the next shader using LDS. - * - * LS outputs = TCS inputs - * TCS outputs = TES inputs - * - * The LDS layout is: - * - TCS inputs for patch 0 - * - TCS inputs for patch 1 - * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) - * - ... - * - TCS outputs for patch 0 = get_tcs_out_patch0_offset - * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset - * - TCS outputs for patch 1 - * - Per-patch TCS outputs for patch 1 - * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) - * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) - * - ... - * - * All three shaders VS(LS), TCS, TES share the same LDS space. - */ -static LLVMValueRef -get_tcs_in_patch_stride(struct radv_shader_context *ctx) +static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, + LLVMIntPredicate pred, LLVMValueRef src0, + LLVMValueRef src1) { - if (ctx->stage == MESA_SHADER_VERTEX) - return ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13); - else if (ctx->stage == MESA_SHADER_TESS_CTRL) - return ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13); - else { - assert(0); - return NULL; - } + LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); + return LLVMBuildSelect(ctx->builder, result, + LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), + ctx->i32_0, ""); } -static LLVMValueRef -get_tcs_out_patch_stride(struct radv_shader_context *ctx) +static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, + LLVMRealPredicate pred, LLVMValueRef src0, + LLVMValueRef src1) { - return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13); + LLVMValueRef result; + src0 = ac_to_float(ctx, src0); + src1 = ac_to_float(ctx, src1); + result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, ""); + return LLVMBuildSelect(ctx->builder, result, + LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), + ctx->i32_0, ""); } -static LLVMValueRef -get_tcs_out_vertex_stride(struct radv_shader_context *ctx) +static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, + const char *intrin, + LLVMTypeRef result_type, + LLVMValueRef src0) { - return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); -} + char name[64]; + LLVMValueRef params[] = { + ac_to_float(ctx, src0), + }; -static LLVMValueRef -get_tcs_out_patch0_offset(struct radv_shader_context *ctx) -{ - return LLVMBuildMul(ctx->ac.builder, - ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16), - LLVMConstInt(ctx->ac.i32, 4, false), ""); + MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, + ac_get_elem_bits(ctx, result_type)); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); } -static LLVMValueRef -get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx) +static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, + const char *intrin, + LLVMTypeRef result_type, + LLVMValueRef src0, LLVMValueRef src1) { - return LLVMBuildMul(ctx->ac.builder, - ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16), - LLVMConstInt(ctx->ac.i32, 4, false), ""); + char name[64]; + LLVMValueRef params[] = { + ac_to_float(ctx, src0), + ac_to_float(ctx, src1), + }; + + MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, + ac_get_elem_bits(ctx, result_type)); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); } -static LLVMValueRef -get_tcs_in_current_patch_offset(struct radv_shader_context *ctx) +static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, + const char *intrin, + LLVMTypeRef result_type, + LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) { - LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + char name[64]; + LLVMValueRef params[] = { + ac_to_float(ctx, src0), + ac_to_float(ctx, src1), + ac_to_float(ctx, src2), + }; - return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, ""); + MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, + ac_get_elem_bits(ctx, result_type)); + assert(length < sizeof(name)); + return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); } -static LLVMValueRef -get_tcs_out_current_patch_offset(struct radv_shader_context *ctx) +static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, + LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) { - LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx); - LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - return LLVMBuildAdd(ctx->ac.builder, patch0_offset, - LLVMBuildMul(ctx->ac.builder, patch_stride, - rel_patch_id, ""), - ""); + LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, + ctx->i32_0, ""); + return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1), + ac_to_integer(ctx, src2), ""); } -static LLVMValueRef -get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx) +static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx, + LLVMIntPredicate pred, + LLVMValueRef src0, LLVMValueRef src1) { - LLVMValueRef patch0_patch_data_offset = - get_tcs_out_patch0_patch_data_offset(ctx); - LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - return LLVMBuildAdd(ctx->ac.builder, patch0_patch_data_offset, - LLVMBuildMul(ctx->ac.builder, patch_stride, - rel_patch_id, ""), - ""); -} + return LLVMBuildSelect(ctx->builder, + LLVMBuildICmp(ctx->builder, pred, src0, src1, ""), + src0, + src1, ""); -static void -set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs, - uint32_t indirect_offset) +} +static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, + LLVMValueRef src0) { - ud_info->sgpr_idx = *sgpr_idx; - ud_info->num_sgprs = num_sgprs; - ud_info->indirect = indirect_offset > 0; - ud_info->indirect_offset = indirect_offset; - *sgpr_idx += num_sgprs; + return emit_minmax_int(ctx, LLVMIntSGT, src0, + LLVMBuildNeg(ctx->builder, src0, "")); } -static void -set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, - uint8_t num_sgprs) +static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, + const char *intrin, + LLVMValueRef src0, LLVMValueRef src1) { - struct ac_userdata_info *ud_info = - &ctx->shader_info->user_sgprs_locs.shader_data[idx]; - assert(ud_info); + LLVMTypeRef ret_type; + LLVMTypeRef types[] = { ctx->i32, ctx->i1 }; + LLVMValueRef res; + LLVMValueRef params[] = { src0, src1 }; + ret_type = LLVMStructTypeInContext(ctx->context, types, + 2, true); - set_loc(ud_info, sgpr_idx, num_sgprs, 0); + res = ac_build_intrinsic(ctx, intrin, ret_type, + params, 2, AC_FUNC_ATTR_READNONE); + + res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); + res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); + return res; } -static void -set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, - uint32_t indirect_offset) +static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, + LLVMValueRef src0) { - struct ac_userdata_info *ud_info = - &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; - assert(ud_info); - - set_loc(ud_info, sgpr_idx, 2, indirect_offset); + return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); } -struct user_sgpr_info { - bool need_ring_offsets; - uint8_t sgpr_count; - bool indirect_all_descriptor_sets; -}; - -static bool needs_view_index_sgpr(struct radv_shader_context *ctx, - gl_shader_stage stage) +static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, + LLVMValueRef src0) { - switch (stage) { - case MESA_SHADER_VERTEX: - if (ctx->shader_info->info.needs_multiview_view_index || - (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index)) - return true; - break; - case MESA_SHADER_TESS_EVAL: - if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index)) - return true; - break; - case MESA_SHADER_GEOMETRY: - case MESA_SHADER_TESS_CTRL: - if (ctx->shader_info->info.needs_multiview_view_index) - return true; - break; - default: - break; - } - return false; + src0 = ac_to_float(ctx, src0); + LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); + return LLVMBuildSExt(ctx->builder, + LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""), + ctx->i32, ""); } -static uint8_t -count_vs_user_sgprs(struct radv_shader_context *ctx) +static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, + LLVMValueRef src0, + unsigned bitsize) { - uint8_t count = 0; + LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); - count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; - count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; + if (bitsize == 32) + return result; - return count; + return LLVMBuildZExt(ctx->builder, result, ctx->i64, ""); } -static void allocate_user_sgprs(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, - bool needs_view_index, - struct user_sgpr_info *user_sgpr_info) +static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, + LLVMValueRef src0) { - memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); - - /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ - if (stage == MESA_SHADER_GEOMETRY || - stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL || - ctx->is_gs_copy_shader) - user_sgpr_info->need_ring_offsets = true; + LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); + return LLVMBuildSExt(ctx->builder, + LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""), + ctx->i32, ""); +} - if (stage == MESA_SHADER_FRAGMENT && - ctx->shader_info->info.ps.needs_sample_positions) - user_sgpr_info->need_ring_offsets = true; +static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, + LLVMValueRef src0) +{ + LLVMValueRef result; + LLVMValueRef cond = NULL; - /* 2 user sgprs will nearly always be allocated for scratch/rings */ - if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { - user_sgpr_info->sgpr_count += 2; - } + src0 = ac_to_float(ctx, src0); + result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); - switch (stage) { - case MESA_SHADER_COMPUTE: - if (ctx->shader_info->info.cs.uses_grid_size) - user_sgpr_info->sgpr_count += 3; - break; - case MESA_SHADER_FRAGMENT: - user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; - break; - case MESA_SHADER_VERTEX: - if (!ctx->is_gs_copy_shader) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - if (ctx->options->key.vs.as_ls) - user_sgpr_info->sgpr_count++; - break; - case MESA_SHADER_TESS_CTRL: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - user_sgpr_info->sgpr_count++; - } - user_sgpr_info->sgpr_count += 4; - break; - case MESA_SHADER_TESS_EVAL: - user_sgpr_info->sgpr_count += 1; - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) { - user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); - } else { - user_sgpr_info->sgpr_count++; - } - } - user_sgpr_info->sgpr_count += 2; - break; - default: - break; + if (ctx->chip_class >= VI) { + LLVMValueRef args[2]; + /* Check if the result is a denormal - and flush to 0 if so. */ + args[0] = result; + args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false); + cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE); } - if (needs_view_index) - user_sgpr_info->sgpr_count++; - - if (ctx->shader_info->info.loads_push_constants) - user_sgpr_info->sgpr_count += 2; - - uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; - uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + /* need to convert back up to f32 */ + result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); - if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { - user_sgpr_info->sgpr_count += 2; - user_sgpr_info->indirect_all_descriptor_sets = true; - } else { - user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; + if (ctx->chip_class >= VI) + result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); + else { + /* for SI/CIK */ + /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, + * so compare the result and flush to 0 if it's smaller. + */ + LLVMValueRef temp, cond2; + temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result); + cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT, + LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""), + temp, ""); + cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, + temp, ctx->f32_0, ""); + cond = LLVMBuildAnd(ctx->builder, cond, cond2, ""); + result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); } + return result; } -static void -declare_global_input_sgprs(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, - const struct user_sgpr_info *user_sgpr_info, - struct arg_info *args, - LLVMValueRef *desc_sets) +static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, + LLVMValueRef src0, LLVMValueRef src1) { - LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); - unsigned num_sets = ctx->options->layout ? - ctx->options->layout->num_sets : 0; - unsigned stage_mask = 1 << stage; - - if (has_previous_stage) - stage_mask |= 1 << previous_stage; - - /* 1 for each descriptor set */ - if (!user_sgpr_info->indirect_all_descriptor_sets) { - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - add_array_arg(args, type, - &ctx->descriptor_sets[i]); - } - } - } else { - add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); - } + LLVMValueRef dst64, result; + src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, ""); + src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, ""); - if (ctx->shader_info->info.loads_push_constants) { - /* 1 for push constants and dynamic descriptors */ - add_array_arg(args, type, &ctx->abi.push_constants); - } + dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); + dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); + result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); + return result; } -static void -declare_vs_specific_input_sgprs(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage, - struct arg_info *args) +static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, + LLVMValueRef src0, LLVMValueRef src1) { - if (!ctx->is_gs_copy_shader && - (stage == MESA_SHADER_VERTEX || - (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { - if (ctx->shader_info->info.vs.has_vertex_buffers) { - add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), - &ctx->vertex_buffers); - } - add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); - add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance); - if (ctx->shader_info->info.vs.needs_draw_id) { - add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id); - } - } -} + LLVMValueRef dst64, result; + src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); + src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); -static void -declare_vs_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) -{ - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id); - if (!ctx->is_gs_copy_shader) { - if (ctx->options->key.vs.as_ls) { - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); - } else { - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id); - } - add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */ - } + dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); + dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); + result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); + return result; } -static void -declare_tes_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) +static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx, + bool is_signed, + const LLVMValueRef srcs[3]) { - add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u); - add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id); - add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id); + LLVMValueRef result; + LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); + + result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); + result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); + return result; } -static void -set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, - bool has_previous_stage, gl_shader_stage previous_stage, - const struct user_sgpr_info *user_sgpr_info, - LLVMValueRef desc_sets, uint8_t *user_sgpr_idx) +static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx, + LLVMValueRef src0, LLVMValueRef src1, + LLVMValueRef src2, LLVMValueRef src3) { - unsigned num_sets = ctx->options->layout ? - ctx->options->layout->num_sets : 0; - unsigned stage_mask = 1 << stage; - - if (has_previous_stage) - stage_mask |= 1 << previous_stage; - - if (!user_sgpr_info->indirect_all_descriptor_sets) { - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - set_loc_desc(ctx, i, user_sgpr_idx, 0); - } else - ctx->descriptor_sets[i] = NULL; - } - } else { - set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, - user_sgpr_idx, 2); - - for (unsigned i = 0; i < num_sets; ++i) { - if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && - ctx->options->layout->set[i].layout->shader_stages & stage_mask) { - set_loc_desc(ctx, i, user_sgpr_idx, i * 8); - ctx->descriptor_sets[i] = - ac_build_load_to_sgpr(&ctx->ac, - desc_sets, - LLVMConstInt(ctx->ac.i32, i, false)); - - } else - ctx->descriptor_sets[i] = NULL; - } - ctx->shader_info->need_indirect_descriptor_sets = true; - } + LLVMValueRef bfi_args[3], result; - if (ctx->shader_info->info.loads_push_constants) { - set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); - } -} + bfi_args[0] = LLVMBuildShl(ctx->builder, + LLVMBuildSub(ctx->builder, + LLVMBuildShl(ctx->builder, + ctx->i32_1, + src3, ""), + ctx->i32_1, ""), + src2, ""); + bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, ""); + bfi_args[2] = src0; -static void -set_vs_specific_input_locs(struct radv_shader_context *ctx, - gl_shader_stage stage, bool has_previous_stage, - gl_shader_stage previous_stage, - uint8_t *user_sgpr_idx) -{ - if (!ctx->is_gs_copy_shader && - (stage == MESA_SHADER_VERTEX || - (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { - if (ctx->shader_info->info.vs.has_vertex_buffers) { - set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, - user_sgpr_idx, 2); - } + LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), ""); - unsigned vs_num = 2; - if (ctx->shader_info->info.vs.needs_draw_id) - vs_num++; + /* Calculate: + * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) + * Use the right-hand side, which the LLVM backend can convert to V_BFI. + */ + result = LLVMBuildXor(ctx->builder, bfi_args[2], + LLVMBuildAnd(ctx->builder, bfi_args[0], + LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), ""); - set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, - user_sgpr_idx, vs_num); - } + result = LLVMBuildSelect(ctx->builder, icond, src1, result, ""); + return result; } -static void create_function(struct radv_shader_context *ctx, - gl_shader_stage stage, - bool has_previous_stage, - gl_shader_stage previous_stage) +static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, + LLVMValueRef src0) { - uint8_t user_sgpr_idx; - struct user_sgpr_info user_sgpr_info; - struct arg_info args = {}; - LLVMValueRef desc_sets; - bool needs_view_index = needs_view_index_sgpr(ctx, stage); - allocate_user_sgprs(ctx, stage, has_previous_stage, - previous_stage, needs_view_index, &user_sgpr_info); - - if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { - add_arg(&args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), - &ctx->ring_offsets); - } - - switch (stage) { - case MESA_SHADER_COMPUTE: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); + LLVMValueRef comp[2]; - if (ctx->shader_info->info.cs.uses_grid_size) { - add_arg(&args, ARG_SGPR, ctx->ac.v3i32, - &ctx->abi.num_work_groups); - } + src0 = ac_to_float(ctx, src0); + comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); + comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); - for (int i = 0; i < 3; i++) { - ctx->abi.workgroup_ids[i] = NULL; - if (ctx->shader_info->info.cs.uses_block_id[i]) { - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.workgroup_ids[i]); - } - } + return ac_build_cvt_pkrtz_f16(ctx, comp); +} - if (ctx->shader_info->info.cs.uses_local_invocation_idx) - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.tg_size); - add_arg(&args, ARG_VGPR, ctx->ac.v3i32, - &ctx->abi.local_invocation_ids); - break; - case MESA_SHADER_VERTEX: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); - declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &args); - - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - if (ctx->options->key.vs.as_es) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->es2gs_offset); - else if (ctx->options->key.vs.as_ls) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->ls_out_layout); - - declare_vs_input_vgprs(ctx, &args); - break; - case MESA_SHADER_TESS_CTRL: - if (has_previous_stage) { - // First 6 system regs - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->merged_wave_info); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tess_factor_offset); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - declare_vs_specific_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, &args); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->ls_out_layout); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_offsets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_in_layout); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_patch_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_rel_ids); - - declare_vs_input_vgprs(ctx, &args); - } else { - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_offsets); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_out_layout); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_in_layout); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tess_factor_offset); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_patch_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.tcs_rel_ids); - } - break; - case MESA_SHADER_TESS_EVAL: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - if (ctx->options->key.tes.as_es) { - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->es2gs_offset); - } else { - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - } - declare_tes_input_vgprs(ctx, &args); - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - // First 6 system regs - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gs2vs_offset); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->merged_wave_info); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown - - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - - if (previous_stage == MESA_SHADER_TESS_EVAL) { - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->tcs_offchip_layout); - } else { - declare_vs_specific_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &args); - } +static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, + LLVMValueRef src0) +{ + LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); + LLVMValueRef temps[2], result, val; + int i; - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_ring_stride); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_num_entries); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[0]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[2]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_prim_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_invocation_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[4]); - - if (previous_stage == MESA_SHADER_VERTEX) { - declare_vs_input_vgprs(ctx, &args); - } else { - declare_tes_input_vgprs(ctx, &args); - } - } else { - declare_global_input_sgprs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_info, &args, - &desc_sets); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_ring_stride); - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->gsvs_num_entries); - if (needs_view_index) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->abi.view_index); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset); - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[0]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[1]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_prim_id); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[2]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[3]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[4]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->gs_vtx_offset[5]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, - &ctx->abi.gs_invocation_id); - } - break; - case MESA_SHADER_FRAGMENT: - declare_global_input_sgprs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_info, - &args, &desc_sets); - - if (ctx->shader_info->info.ps.needs_sample_positions) - add_arg(&args, ARG_SGPR, ctx->ac.i32, - &ctx->sample_pos_offset); - - add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid); - add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */ - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center); - add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid); - add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */ - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]); - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]); - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]); - add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]); - add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face); - add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary); - add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage); - add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */ - break; - default: - unreachable("Shader stage not implemented"); + for (i = 0; i < 2; i++) { + val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; + val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); + val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); + temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); } - ctx->main_function = create_llvm_function( - ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args, - ctx->max_workgroup_size, - ctx->options->unsafe_math); - set_llvm_calling_convention(ctx->main_function, stage); - - - ctx->shader_info->num_input_vgprs = 0; - ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0; - - ctx->shader_info->num_input_sgprs += args.num_sgprs_used; - - if (ctx->stage != MESA_SHADER_FRAGMENT) - ctx->shader_info->num_input_vgprs = args.num_vgprs_used; - - assign_arguments(ctx->main_function, &args); + result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0], + ctx->i32_0, ""); + result = LLVMBuildInsertElement(ctx->builder, result, temps[1], + ctx->i32_1, ""); + return result; +} - user_sgpr_idx = 0; +static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, + nir_op op, + LLVMValueRef src0) +{ + unsigned mask; + int idx; + LLVMValueRef result; - if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { - set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, - &user_sgpr_idx, 2); - if (ctx->options->supports_spill) { - ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", - LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE), - NULL, 0, AC_FUNC_ATTR_READNONE); - ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, - ac_array_in_const_addr_space(ctx->ac.v4i32), ""); - } - } - - /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including - * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ - if (has_previous_stage) - user_sgpr_idx = 0; - - set_global_input_locs(ctx, stage, has_previous_stage, previous_stage, - &user_sgpr_info, desc_sets, &user_sgpr_idx); - - switch (stage) { - case MESA_SHADER_COMPUTE: - if (ctx->shader_info->info.cs.uses_grid_size) { - set_loc_shader(ctx, AC_UD_CS_GRID_SIZE, - &user_sgpr_idx, 3); - } - break; - case MESA_SHADER_VERTEX: - set_vs_specific_input_locs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - if (ctx->options->key.vs.as_ls) { - set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, - &user_sgpr_idx, 1); - } - break; - case MESA_SHADER_TESS_CTRL: - set_vs_specific_input_locs(ctx, stage, has_previous_stage, - previous_stage, &user_sgpr_idx); - if (has_previous_stage) - set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, - &user_sgpr_idx, 1); - set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_TESS_EVAL: - set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_GEOMETRY: - if (has_previous_stage) { - if (previous_stage == MESA_SHADER_VERTEX) - set_vs_specific_input_locs(ctx, stage, - has_previous_stage, - previous_stage, - &user_sgpr_idx); - else - set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, - &user_sgpr_idx, 1); - } - set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, - &user_sgpr_idx, 2); - if (ctx->abi.view_index) - set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); - break; - case MESA_SHADER_FRAGMENT: - if (ctx->shader_info->info.ps.needs_sample_positions) { - set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, - &user_sgpr_idx, 1); - } - break; - default: - unreachable("Shader stage not implemented"); - } + if (op == nir_op_fddx_fine) + mask = AC_TID_MASK_LEFT; + else if (op == nir_op_fddy_fine) + mask = AC_TID_MASK_TOP; + else + mask = AC_TID_MASK_TOP_LEFT; - if (stage == MESA_SHADER_TESS_CTRL || - (stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_ls) || - /* GFX9 has the ESGS ring buffer in LDS. */ - (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) { - ac_declare_lds_as_pointer(&ctx->ac); - } + /* for DDX we want to next X pixel, DDY next Y pixel. */ + if (op == nir_op_fddx_fine || + op == nir_op_fddx_coarse || + op == nir_op_fddx) + idx = 1; + else + idx = 2; - ctx->shader_info->num_user_sgprs = user_sgpr_idx; + result = ac_build_ddxy(&ctx->ac, mask, idx, src0); + return result; } -static void -build_store_values_extended(struct ac_llvm_context *ac, - LLVMValueRef *values, - unsigned value_count, - unsigned value_stride, - LLVMValueRef vec) +/* + * this takes an I,J coordinate pair, + * and works out the X and Y derivatives. + * it returns DDX(I), DDX(J), DDY(I), DDY(J). + */ +static LLVMValueRef emit_ddxy_interp( + struct ac_nir_context *ctx, + LLVMValueRef interp_ij) { - LLVMBuilderRef builder = ac->builder; + LLVMValueRef result[4], a; unsigned i; - for (i = 0; i < value_count; i++) { - LLVMValueRef ptr = values[i * value_stride]; - LLVMValueRef index = LLVMConstInt(ac->i32, i, false); - LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, ""); - LLVMBuildStore(builder, value, ptr); - } -} - -static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, - const nir_ssa_def *def) -{ - LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size); - if (def->num_components > 1) { - type = LLVMVectorType(type, def->num_components); + for (i = 0; i < 2; i++) { + a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij, + LLVMConstInt(ctx->ac.i32, i, false), ""); + result[i] = emit_ddxy(ctx, nir_op_fddx, a); + result[2+i] = emit_ddxy(ctx, nir_op_fddy, a); } - return type; + return ac_build_gather_values(&ctx->ac, result, 4); } -static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src) +static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) { - assert(src.is_ssa); - struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa); - return (LLVMValueRef)entry->data; -} - -static LLVMValueRef -get_memory_ptr(struct ac_nir_context *ctx, nir_src src) -{ - LLVMValueRef ptr = get_src(ctx, src); - ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, ""); - int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - - return LLVMBuildBitCast(ctx->ac.builder, ptr, - LLVMPointerType(ctx->ac.i32, addr_space), ""); -} - -static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, - const struct nir_block *b) -{ - struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b); - return (LLVMBasicBlockRef)entry->data; -} - -static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, - nir_alu_src src, - unsigned num_components) -{ - LLVMValueRef value = get_src(ctx, src.src); - bool need_swizzle = false; - - assert(value); - unsigned src_components = ac_get_llvm_num_components(value); - for (unsigned i = 0; i < num_components; ++i) { - assert(src.swizzle[i] < src_components); - if (src.swizzle[i] != i) - need_swizzle = true; - } - - if (need_swizzle || num_components != src_components) { - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, src.swizzle[0], false), - LLVMConstInt(ctx->ac.i32, src.swizzle[1], false), - LLVMConstInt(ctx->ac.i32, src.swizzle[2], false), - LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)}; + LLVMValueRef src[4], result = NULL; + unsigned num_components = instr->dest.dest.ssa.num_components; + unsigned src_components; + LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); - if (src_components > 1 && num_components == 1) { - value = LLVMBuildExtractElement(ctx->ac.builder, value, - masks[0], ""); - } else if (src_components == 1 && num_components > 1) { - LLVMValueRef values[] = {value, value, value, value}; - value = ac_build_gather_values(&ctx->ac, values, num_components); - } else { - LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, - swizzle, ""); - } + assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); + switch (instr->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + src_components = 1; + break; + case nir_op_pack_half_2x16: + src_components = 2; + break; + case nir_op_unpack_half_2x16: + src_components = 1; + break; + case nir_op_cube_face_coord: + case nir_op_cube_face_index: + src_components = 3; + break; + default: + src_components = num_components; + break; } - assert(!src.negate); - assert(!src.abs); - return value; -} - -static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, - LLVMIntPredicate pred, LLVMValueRef src0, - LLVMValueRef src1) -{ - LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, ""); - return LLVMBuildSelect(ctx->builder, result, - LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), - ctx->i32_0, ""); -} - -static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, - LLVMRealPredicate pred, LLVMValueRef src0, - LLVMValueRef src1) -{ - LLVMValueRef result; - src0 = ac_to_float(ctx, src0); - src1 = ac_to_float(ctx, src1); - result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, ""); - return LLVMBuildSelect(ctx->builder, result, - LLVMConstInt(ctx->i32, 0xFFFFFFFF, false), - ctx->i32_0, ""); -} - -static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, - const char *intrin, - LLVMTypeRef result_type, - LLVMValueRef src0) -{ - char name[64]; - LLVMValueRef params[] = { - ac_to_float(ctx, src0), - }; - - MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); - assert(length < sizeof(name)); - return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE); -} - -static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, - const char *intrin, - LLVMTypeRef result_type, - LLVMValueRef src0, LLVMValueRef src1) -{ - char name[64]; - LLVMValueRef params[] = { - ac_to_float(ctx, src0), - ac_to_float(ctx, src1), - }; - - MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); - assert(length < sizeof(name)); - return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE); -} - -static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, - const char *intrin, - LLVMTypeRef result_type, - LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) -{ - char name[64]; - LLVMValueRef params[] = { - ac_to_float(ctx, src0), - ac_to_float(ctx, src1), - ac_to_float(ctx, src2), - }; - - MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin, - ac_get_elem_bits(ctx, result_type)); - assert(length < sizeof(name)); - return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE); -} - -static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2) -{ - LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, - ctx->i32_0, ""); - return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1), - ac_to_integer(ctx, src2), ""); -} - -static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx, - LLVMIntPredicate pred, - LLVMValueRef src0, LLVMValueRef src1) -{ - return LLVMBuildSelect(ctx->builder, - LLVMBuildICmp(ctx->builder, pred, src0, src1, ""), - src0, - src1, ""); - -} -static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - return emit_minmax_int(ctx, LLVMIntSGT, src0, - LLVMBuildNeg(ctx->builder, src0, "")); -} - -static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, - const char *intrin, - LLVMValueRef src0, LLVMValueRef src1) -{ - LLVMTypeRef ret_type; - LLVMTypeRef types[] = { ctx->i32, ctx->i1 }; - LLVMValueRef res; - LLVMValueRef params[] = { src0, src1 }; - ret_type = LLVMStructTypeInContext(ctx->context, types, - 2, true); - - res = ac_build_intrinsic(ctx, intrin, ret_type, - params, 2, AC_FUNC_ATTR_READNONE); - - res = LLVMBuildExtractValue(ctx->builder, res, 1, ""); - res = LLVMBuildZExt(ctx->builder, res, ctx->i32, ""); - return res; -} - -static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), ""); -} - -static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - src0 = ac_to_float(ctx, src0); - LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); - return LLVMBuildSExt(ctx->builder, - LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""), - ctx->i32, ""); -} - -static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, - LLVMValueRef src0, - unsigned bitsize) -{ - LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, ""); - - if (bitsize == 32) - return result; - - return LLVMBuildZExt(ctx->builder, result, ctx->i64, ""); -} - -static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0)); - return LLVMBuildSExt(ctx->builder, - LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""), - ctx->i32, ""); -} - -static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef result; - LLVMValueRef cond = NULL; - - src0 = ac_to_float(ctx, src0); - result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, ""); - - if (ctx->chip_class >= VI) { - LLVMValueRef args[2]; - /* Check if the result is a denormal - and flush to 0 if so. */ - args[0] = result; - args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false); - cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE); - } - - /* need to convert back up to f32 */ - result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, ""); - - if (ctx->chip_class >= VI) - result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); - else { - /* for SI/CIK */ - /* 0x38800000 is smallest half float value (2^-14) in 32-bit float, - * so compare the result and flush to 0 if it's smaller. - */ - LLVMValueRef temp, cond2; - temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result); - cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT, - LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""), - temp, ""); - cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE, - temp, ctx->f32_0, ""); - cond = LLVMBuildAnd(ctx->builder, cond, cond2, ""); - result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, ""); - } - return result; -} - -static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1) -{ - LLVMValueRef dst64, result; - src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, ""); - src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, ""); - - dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); - dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); - result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); - return result; -} - -static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1) -{ - LLVMValueRef dst64, result; - src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, ""); - src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, ""); - - dst64 = LLVMBuildMul(ctx->builder, src0, src1, ""); - dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), ""); - result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, ""); - return result; -} - -static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx, - bool is_signed, - const LLVMValueRef srcs[3]) -{ - LLVMValueRef result; - LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), ""); - - result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed); - result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, ""); - return result; -} - -static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx, - LLVMValueRef src0, LLVMValueRef src1, - LLVMValueRef src2, LLVMValueRef src3) -{ - LLVMValueRef bfi_args[3], result; - - bfi_args[0] = LLVMBuildShl(ctx->builder, - LLVMBuildSub(ctx->builder, - LLVMBuildShl(ctx->builder, - ctx->i32_1, - src3, ""), - ctx->i32_1, ""), - src2, ""); - bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, ""); - bfi_args[2] = src0; - - LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), ""); - - /* Calculate: - * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) - * Use the right-hand side, which the LLVM backend can convert to V_BFI. - */ - result = LLVMBuildXor(ctx->builder, bfi_args[2], - LLVMBuildAnd(ctx->builder, bfi_args[0], - LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), ""); - - result = LLVMBuildSelect(ctx->builder, icond, src1, result, ""); - return result; -} - -static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef comp[2]; - - src0 = ac_to_float(ctx, src0); - comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, ""); - comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, ""); - - return ac_build_cvt_pkrtz_f16(ctx, comp); -} - -static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, - LLVMValueRef src0) -{ - LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false); - LLVMValueRef temps[2], result, val; - int i; - - for (i = 0; i < 2; i++) { - val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0; - val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, ""); - val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, ""); - temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, ""); - } - - result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0], - ctx->i32_0, ""); - result = LLVMBuildInsertElement(ctx->builder, result, temps[1], - ctx->i32_1, ""); - return result; -} - -static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, - nir_op op, - LLVMValueRef src0) -{ - unsigned mask; - int idx; - LLVMValueRef result; - - if (op == nir_op_fddx_fine) - mask = AC_TID_MASK_LEFT; - else if (op == nir_op_fddy_fine) - mask = AC_TID_MASK_TOP; - else - mask = AC_TID_MASK_TOP_LEFT; - - /* for DDX we want to next X pixel, DDY next Y pixel. */ - if (op == nir_op_fddx_fine || - op == nir_op_fddx_coarse || - op == nir_op_fddx) - idx = 1; - else - idx = 2; - - result = ac_build_ddxy(&ctx->ac, mask, idx, src0); - return result; -} - -/* - * this takes an I,J coordinate pair, - * and works out the X and Y derivatives. - * it returns DDX(I), DDX(J), DDY(I), DDY(J). - */ -static LLVMValueRef emit_ddxy_interp( - struct ac_nir_context *ctx, - LLVMValueRef interp_ij) -{ - LLVMValueRef result[4], a; - unsigned i; - - for (i = 0; i < 2; i++) { - a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij, - LLVMConstInt(ctx->ac.i32, i, false), ""); - result[i] = emit_ddxy(ctx, nir_op_fddx, a); - result[2+i] = emit_ddxy(ctx, nir_op_fddy, a); - } - return ac_build_gather_values(&ctx->ac, result, 4); -} - -static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) -{ - LLVMValueRef src[4], result = NULL; - unsigned num_components = instr->dest.dest.ssa.num_components; - unsigned src_components; - LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa); - - assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src)); - switch (instr->op) { - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - src_components = 1; - break; - case nir_op_pack_half_2x16: - src_components = 2; - break; - case nir_op_unpack_half_2x16: - src_components = 1; - break; - case nir_op_cube_face_coord: - case nir_op_cube_face_index: - src_components = 3; - break; - default: - src_components = num_components; - break; - } - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - src[i] = get_alu_src(ctx, instr->src[i], src_components); + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + src[i] = get_alu_src(ctx, instr->src[i], src_components); switch (instr->op) { case nir_op_fmov: @@ -1726,5010 +680,3134 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = ac_build_isign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); break; - case nir_op_fsign: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fsign(&ctx->ac, src[0], - instr->dest.dest.ssa.bit_size); - break; - case nir_op_ffloor: - result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_ftrunc: - result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fceil: - result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fround_even: - result = emit_intrin_1f_param(&ctx->ac, "llvm.rint", - ac_to_float_type(&ctx->ac, def_type),src[0]); - break; - case nir_op_ffract: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = ac_build_fract(&ctx->ac, src[0], - instr->dest.dest.ssa.bit_size); - break; - case nir_op_fsin: - result = emit_intrin_1f_param(&ctx->ac, "llvm.sin", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fcos: - result = emit_intrin_1f_param(&ctx->ac, "llvm.cos", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fsqrt: - result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_fexp2: - result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_flog2: - result = emit_intrin_1f_param(&ctx->ac, "llvm.log2", - ac_to_float_type(&ctx->ac, def_type), src[0]); - break; - case nir_op_frsq: - result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", - ac_to_float_type(&ctx->ac, def_type), src[0]); - result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, - result); - break; - case nir_op_fmax: - result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - if (ctx->ac.chip_class < GFX9 && - instr->dest.dest.ssa.bit_size == 32) { - /* Only pre-GFX9 chips do not flush denorms. */ - result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", - ac_to_float_type(&ctx->ac, def_type), - result); - } - break; - case nir_op_fmin: - result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); - if (ctx->ac.chip_class < GFX9 && - instr->dest.dest.ssa.bit_size == 32) { - /* Only pre-GFX9 chips do not flush denorms. */ - result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", - ac_to_float_type(&ctx->ac, def_type), - result); - } - break; - case nir_op_ffma: - result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); - break; - case nir_op_ldexp: - src[0] = ac_to_float(&ctx->ac, src[0]); - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32) - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE); - else - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE); - break; - case nir_op_ibitfield_extract: - result = emit_bitfield_extract(&ctx->ac, true, src); - break; - case nir_op_ubitfield_extract: - result = emit_bitfield_extract(&ctx->ac, false, src); - break; - case nir_op_bitfield_insert: - result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]); - break; - case nir_op_bitfield_reverse: - result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); - break; - case nir_op_bit_count: - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32) - result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); - else { - result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE); - result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); - } - break; - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - src[i] = ac_to_integer(&ctx->ac, src[i]); - result = ac_build_gather_values(&ctx->ac, src, num_components); - break; - case nir_op_f2i32: - case nir_op_f2i64: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_f2u32: - case nir_op_f2u64: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_i2f32: - case nir_op_i2f64: - src[0] = ac_to_integer(&ctx->ac, src[0]); - result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - break; - case nir_op_u2f32: - case nir_op_u2f64: - src[0] = ac_to_integer(&ctx->ac, src[0]); - result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - break; - case nir_op_f2f64: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - break; - case nir_op_f2f32: - src[0] = ac_to_float(&ctx->ac, src[0]); - result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); - break; - case nir_op_u2u32: - case nir_op_u2u64: - src[0] = ac_to_integer(&ctx->ac, src[0]); - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); - else - result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_i2i32: - case nir_op_i2i64: - src[0] = ac_to_integer(&ctx->ac, src[0]); - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) - result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); - else - result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); - break; - case nir_op_bcsel: - result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); - break; - case nir_op_find_lsb: - src[0] = ac_to_integer(&ctx->ac, src[0]); - result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]); - break; - case nir_op_ufind_msb: - src[0] = ac_to_integer(&ctx->ac, src[0]); - result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32); - break; - case nir_op_ifind_msb: - src[0] = ac_to_integer(&ctx->ac, src[0]); - result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32); - break; - case nir_op_uadd_carry: - src[0] = ac_to_integer(&ctx->ac, src[0]); - src[1] = ac_to_integer(&ctx->ac, src[1]); - result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]); - break; - case nir_op_usub_borrow: - src[0] = ac_to_integer(&ctx->ac, src[0]); - src[1] = ac_to_integer(&ctx->ac, src[1]); - result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); - break; - case nir_op_b2f: - result = emit_b2f(&ctx->ac, src[0]); - break; - case nir_op_f2b: - result = emit_f2b(&ctx->ac, src[0]); - break; - case nir_op_b2i: - result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); - break; - case nir_op_i2b: - src[0] = ac_to_integer(&ctx->ac, src[0]); - result = emit_i2b(&ctx->ac, src[0]); - break; - case nir_op_fquantize2f16: - result = emit_f2f16(&ctx->ac, src[0]); - break; - case nir_op_umul_high: - src[0] = ac_to_integer(&ctx->ac, src[0]); - src[1] = ac_to_integer(&ctx->ac, src[1]); - result = emit_umul_high(&ctx->ac, src[0], src[1]); - break; - case nir_op_imul_high: - src[0] = ac_to_integer(&ctx->ac, src[0]); - src[1] = ac_to_integer(&ctx->ac, src[1]); - result = emit_imul_high(&ctx->ac, src[0], src[1]); - break; - case nir_op_pack_half_2x16: - result = emit_pack_half_2x16(&ctx->ac, src[0]); - break; - case nir_op_unpack_half_2x16: - result = emit_unpack_half_2x16(&ctx->ac, src[0]); - break; - case nir_op_fddx: - case nir_op_fddy: - case nir_op_fddx_fine: - case nir_op_fddy_fine: - case nir_op_fddx_coarse: - case nir_op_fddy_coarse: - result = emit_ddxy(ctx, instr->op, src[0]); - break; - - case nir_op_unpack_64_2x32_split_x: { - assert(ac_get_llvm_num_components(src[0]) == 1); - LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], - ctx->ac.v2i32, - ""); - result = LLVMBuildExtractElement(ctx->ac.builder, tmp, - ctx->ac.i32_0, ""); - break; - } - - case nir_op_unpack_64_2x32_split_y: { - assert(ac_get_llvm_num_components(src[0]) == 1); - LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], - ctx->ac.v2i32, - ""); - result = LLVMBuildExtractElement(ctx->ac.builder, tmp, - ctx->ac.i32_1, ""); - break; - } - - case nir_op_pack_64_2x32_split: { - LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32); - tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, - src[0], ctx->ac.i32_0, ""); - tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, - src[1], ctx->ac.i32_1, ""); - result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); - break; - } - - case nir_op_cube_face_coord: { - src[0] = ac_to_float(&ctx->ac, src[0]); - LLVMValueRef results[2]; - LLVMValueRef in[3]; - for (unsigned chan = 0; chan < 3; chan++) - in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); - results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", - ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); - results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", - ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); - result = ac_build_gather_values(&ctx->ac, results, 2); - break; - } - - case nir_op_cube_face_index: { - src[0] = ac_to_float(&ctx->ac, src[0]); - LLVMValueRef in[3]; - for (unsigned chan = 0; chan < 3; chan++) - in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); - result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", - ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); - break; - } - - default: - fprintf(stderr, "Unknown NIR alu instr: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); - abort(); - } - - if (result) { - assert(instr->dest.dest.is_ssa); - result = ac_to_integer(&ctx->ac, result); - _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa, - result); - } -} - -static void visit_load_const(struct ac_nir_context *ctx, - const nir_load_const_instr *instr) -{ - LLVMValueRef values[4], value = NULL; - LLVMTypeRef element_type = - LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); - - for (unsigned i = 0; i < instr->def.num_components; ++i) { - switch (instr->def.bit_size) { - case 32: - values[i] = LLVMConstInt(element_type, - instr->value.u32[i], false); - break; - case 64: - values[i] = LLVMConstInt(element_type, - instr->value.u64[i], false); - break; - default: - fprintf(stderr, - "unsupported nir load_const bit_size: %d\n", - instr->def.bit_size); - abort(); - } - } - if (instr->def.num_components > 1) { - value = LLVMConstVector(values, instr->def.num_components); - } else - value = values[0]; - - _mesa_hash_table_insert(ctx->defs, &instr->def, value); -} - -static LLVMValueRef -get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements) -{ - LLVMValueRef size = - LLVMBuildExtractElement(ctx->ac.builder, descriptor, - LLVMConstInt(ctx->ac.i32, 2, false), ""); - - /* VI only */ - if (ctx->ac.chip_class == VI && in_elements) { - /* On VI, the descriptor contains the size in bytes, - * but TXQ must return the size in elements. - * The stride is always non-zero for resources using TXQ. - */ - LLVMValueRef stride = - LLVMBuildExtractElement(ctx->ac.builder, descriptor, - ctx->ac.i32_1, ""); - stride = LLVMBuildLShr(ctx->ac.builder, stride, - LLVMConstInt(ctx->ac.i32, 16, false), ""); - stride = LLVMBuildAnd(ctx->ac.builder, stride, - LLVMConstInt(ctx->ac.i32, 0x3fff, false), ""); - - size = LLVMBuildUDiv(ctx->ac.builder, size, stride, ""); - } - return size; -} - -/** - * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with - * intrinsic names). - */ -static void build_int_type_name( - LLVMTypeRef type, - char *buf, unsigned bufsize) -{ - assert(bufsize >= 6); - - if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) - snprintf(buf, bufsize, "v%ui32", - LLVMGetVectorSize(type)); - else - strcpy(buf, "i32"); -} - -static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx, - struct ac_image_args *args, - const nir_tex_instr *instr) -{ - enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); - LLVMValueRef coord = args->addr; - LLVMValueRef half_texel[2]; - LLVMValueRef compare_cube_wa = NULL; - LLVMValueRef result; - int c; - unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare; - - //TODO Rect - { - struct ac_image_args txq_args = { 0 }; - - txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; - txq_args.opcode = ac_image_get_resinfo; - txq_args.dmask = 0xf; - txq_args.addr = ctx->i32_0; - txq_args.resource = args->resource; - LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); - - for (c = 0; c < 2; c++) { - half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, - LLVMConstInt(ctx->i32, c, false), ""); - half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); - half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); - half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], - LLVMConstReal(ctx->f32, -0.5), ""); - } - } - - LLVMValueRef orig_coords = args->addr; - - for (c = 0; c < 2; c++) { - LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); - tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); - coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); - } - - - /* - * Apparantly cube has issue with integer types that the workaround doesn't solve, - * so this tests if the format is 8_8_8_8 and an integer type do an alternate - * workaround by sampling using a scaled type and converting. - * This is taken from amdgpu-pro shaders. - */ - /* NOTE this produces some ugly code compared to amdgpu-pro, - * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select, - * and then reads them back. -pro generates two selects, - * one s_cmp for the descriptor rewriting - * one v_cmp for the coordinate and result changes. - */ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - LLVMValueRef tmp, tmp2; - - /* workaround 8/8/8/8 uint/sint cube gather bug */ - /* first detect it then change to a scaled read and f2i */ - tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, ""); - tmp2 = tmp; - - /* extract the DATA_FORMAT */ - tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false), - LLVMConstInt(ctx->i32, 6, false), false); - - /* is the DATA_FORMAT == 8_8_8_8 */ - compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), ""); - - if (stype == GLSL_TYPE_UINT) - /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */ - tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false), - LLVMConstInt(ctx->i32, 0x10000000, false), ""); - else - /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */ - tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false), - LLVMConstInt(ctx->i32, 0x14000000, false), ""); - - /* replace the NUM FORMAT in the descriptor */ - tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), ""); - tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, ""); - - args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); - - /* don't modify the coordinates for this case */ - coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); - } - args->addr = coord; - result = ac_build_image_opcode(ctx, args); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { - LLVMValueRef tmp, tmp2; - - /* if the cube workaround is in place, f2i the result. */ - for (c = 0; c < 4; c++) { - tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); - if (stype == GLSL_TYPE_UINT) - tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); - else - tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); - tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, ""); - tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); - result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), ""); - } - } - return result; -} - -static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, - const nir_tex_instr *instr, - bool lod_is_zero, - struct ac_image_args *args) -{ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); - - return ac_build_buffer_load_format(&ctx->ac, - args->resource, - args->addr, - ctx->ac.i32_0, - util_last_bit(mask), - false, true); - } - - args->opcode = ac_image_sample; - args->compare = instr->is_shadow; - - switch (instr->op) { - case nir_texop_txf: - case nir_texop_txf_ms: - case nir_texop_samples_identical: - args->opcode = lod_is_zero || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? - ac_image_load : ac_image_load_mip; - args->compare = false; - args->offset = false; - break; - case nir_texop_txb: - args->bias = true; - break; - case nir_texop_txl: - if (lod_is_zero) - args->level_zero = true; - else - args->lod = true; - break; - case nir_texop_txs: - case nir_texop_query_levels: - args->opcode = ac_image_get_resinfo; - break; - case nir_texop_tex: - if (ctx->stage != MESA_SHADER_FRAGMENT) - args->level_zero = true; - break; - case nir_texop_txd: - args->deriv = true; - break; - case nir_texop_tg4: - args->opcode = ac_image_gather4; - args->level_zero = true; - break; - case nir_texop_lod: - args->opcode = ac_image_get_lod; - args->compare = false; - args->offset = false; - break; - default: - break; - } - - if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { - enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); - if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { - return radv_lower_gather4_integer(&ctx->ac, args, instr); - } - } - return ac_build_image_opcode(&ctx->ac, args); -} - -static LLVMValueRef -radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, - unsigned desc_set, unsigned binding) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; - struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; - struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; - unsigned base_offset = layout->binding[binding].offset; - LLVMValueRef offset, stride; - - if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || - layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + - layout->binding[binding].dynamic_offset_offset; - desc_ptr = ctx->abi.push_constants; - base_offset = pipeline_layout->push_constant_size + 16 * idx; - stride = LLVMConstInt(ctx->ac.i32, 16, false); - } else - stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false); - - offset = LLVMConstInt(ctx->ac.i32, base_offset, false); - index = LLVMBuildMul(ctx->ac.builder, index, stride, ""); - offset = LLVMBuildAdd(ctx->ac.builder, offset, index, ""); - - desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset); - desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32); - LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); - - return desc_ptr; -} - -static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - LLVMValueRef ptr = get_src(ctx, instr->src[0]); - LLVMValueRef index = get_src(ctx, instr->src[1]); - - LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); - LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md); - return result; -} - -static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - LLVMValueRef ptr, addr; - - addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0); - addr = LLVMBuildAdd(ctx->ac.builder, addr, - get_src(ctx, instr->src[0]), ""); - - ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr); - ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa)); - - return LLVMBuildLoad(ctx->ac.builder, ptr, ""); -} - -static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef index = get_src(ctx, instr->src[0]); - - return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false); -} - -static uint32_t widen_mask(uint32_t mask, unsigned multiplier) -{ - uint32_t new_mask = 0; - for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i) - if (mask & (1u << i)) - new_mask |= ((1u << multiplier) - 1u) << (i * multiplier); - return new_mask; -} - -static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src, - unsigned start, unsigned count) -{ - LLVMTypeRef type = LLVMTypeOf(src); - - if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) { - assert(start == 0); - assert(count == 1); - return src; - } - - unsigned src_elements = LLVMGetVectorSize(type); - assert(start < src_elements); - assert(start + count <= src_elements); - - if (start == 0 && count == src_elements) - return src; - - if (count == 1) - return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), ""); - - assert(count <= 8); - LLVMValueRef indices[8]; - for (unsigned i = 0; i < count; ++i) - indices[i] = LLVMConstInt(ctx->i32, start + i, false); - - LLVMValueRef swizzle = LLVMConstVector(indices, count); - return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, ""); -} - -static void visit_store_ssbo(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - const char *store_name; - LLVMValueRef src_data = get_src(ctx, instr->src[0]); - LLVMTypeRef data_type = ctx->ac.f32; - int elem_size_mult = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32; - int components_32bit = elem_size_mult * instr->num_components; - unsigned writemask = nir_intrinsic_write_mask(instr); - LLVMValueRef base_data, base_offset; - LLVMValueRef params[6]; - - params[1] = ctx->abi->load_ssbo(ctx->abi, - get_src(ctx, instr->src[1]), true); - params[2] = ctx->ac.i32_0; /* vindex */ - params[4] = ctx->ac.i1false; /* glc */ - params[5] = ctx->ac.i1false; /* slc */ - - if (components_32bit > 1) - data_type = LLVMVectorType(ctx->ac.f32, components_32bit); - - writemask = widen_mask(writemask, elem_size_mult); - - base_data = ac_to_float(&ctx->ac, src_data); - base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components); - base_data = LLVMBuildBitCast(ctx->ac.builder, base_data, - data_type, ""); - base_offset = get_src(ctx, instr->src[2]); /* voffset */ - while (writemask) { - int start, count; - LLVMValueRef data; - LLVMValueRef offset; - - u_bit_scan_consecutive_range(&writemask, &start, &count); - - /* Due to an LLVM limitation, split 3-element writes - * into a 2-element and a 1-element write. */ - if (count == 3) { - writemask |= 1 << (start + 2); - count = 2; - } - - if (count > 4) { - writemask |= ((1u << (count - 4)) - 1u) << (start + 4); - count = 4; - } - - if (count == 4) { - store_name = "llvm.amdgcn.buffer.store.v4f32"; - } else if (count == 2) { - store_name = "llvm.amdgcn.buffer.store.v2f32"; - - } else { - assert(count == 1); - store_name = "llvm.amdgcn.buffer.store.f32"; - } - data = extract_vector_range(&ctx->ac, base_data, start, count); - - offset = base_offset; - if (start != 0) { - offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), ""); - } - params[0] = data; - params[3] = offset; - ac_build_intrinsic(&ctx->ac, store_name, - ctx->ac.voidt, params, 6, 0); - } -} - -static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - const char *name; - LLVMValueRef params[6]; - int arg_count = 0; - - if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { - params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); - } - params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); - params[arg_count++] = ctx->abi->load_ssbo(ctx->abi, - get_src(ctx, instr->src[0]), - true); - params[arg_count++] = ctx->ac.i32_0; /* vindex */ - params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ - params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */ - - switch (instr->intrinsic) { - case nir_intrinsic_ssbo_atomic_add: - name = "llvm.amdgcn.buffer.atomic.add"; - break; - case nir_intrinsic_ssbo_atomic_imin: - name = "llvm.amdgcn.buffer.atomic.smin"; - break; - case nir_intrinsic_ssbo_atomic_umin: - name = "llvm.amdgcn.buffer.atomic.umin"; - break; - case nir_intrinsic_ssbo_atomic_imax: - name = "llvm.amdgcn.buffer.atomic.smax"; - break; - case nir_intrinsic_ssbo_atomic_umax: - name = "llvm.amdgcn.buffer.atomic.umax"; - break; - case nir_intrinsic_ssbo_atomic_and: - name = "llvm.amdgcn.buffer.atomic.and"; - break; - case nir_intrinsic_ssbo_atomic_or: - name = "llvm.amdgcn.buffer.atomic.or"; - break; - case nir_intrinsic_ssbo_atomic_xor: - name = "llvm.amdgcn.buffer.atomic.xor"; - break; - case nir_intrinsic_ssbo_atomic_exchange: - name = "llvm.amdgcn.buffer.atomic.swap"; - break; - case nir_intrinsic_ssbo_atomic_comp_swap: - name = "llvm.amdgcn.buffer.atomic.cmpswap"; - break; - default: - abort(); - } - - return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0); -} - -static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef results[2]; - int load_components; - int num_components = instr->num_components; - if (instr->dest.ssa.bit_size == 64) - num_components *= 2; - - for (int i = 0; i < num_components; i += load_components) { - load_components = MIN2(num_components - i, 4); - const char *load_name; - LLVMTypeRef data_type = ctx->ac.f32; - LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false); - offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, ""); - - if (load_components == 3) - data_type = LLVMVectorType(ctx->ac.f32, 4); - else if (load_components > 1) - data_type = LLVMVectorType(ctx->ac.f32, load_components); - - if (load_components >= 3) - load_name = "llvm.amdgcn.buffer.load.v4f32"; - else if (load_components == 2) - load_name = "llvm.amdgcn.buffer.load.v2f32"; - else if (load_components == 1) - load_name = "llvm.amdgcn.buffer.load.f32"; - else - unreachable("unhandled number of components"); - - LLVMValueRef params[] = { - ctx->abi->load_ssbo(ctx->abi, - get_src(ctx, instr->src[0]), - false), - ctx->ac.i32_0, - offset, - ctx->ac.i1false, - ctx->ac.i1false, - }; - - results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); - } - - assume(results[0]); - LLVMValueRef ret = results[0]; - if (num_components > 4 || num_components == 3) { - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), - LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false), - LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false) - }; - - LLVMValueRef swizzle = LLVMConstVector(masks, num_components); - ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0], - results[num_components > 4 ? 1 : 0], swizzle, ""); - } - - return LLVMBuildBitCast(ctx->ac.builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); -} - -static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef ret; - LLVMValueRef rsrc = get_src(ctx, instr->src[0]); - LLVMValueRef offset = get_src(ctx, instr->src[1]); - int num_components = instr->num_components; - - if (ctx->abi->load_ubo) - rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); - - if (instr->dest.ssa.bit_size == 64) - num_components *= 2; - - ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, - NULL, 0, false, false, true, true); - ret = ac_trim_vector(&ctx->ac, ret, num_components); - return LLVMBuildBitCast(ctx->ac.builder, ret, - get_def_type(ctx, &instr->dest.ssa), ""); -} - -static void -get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref, - bool vs_in, unsigned *vertex_index_out, - LLVMValueRef *vertex_index_ref, - unsigned *const_out, LLVMValueRef *indir_out) -{ - unsigned const_offset = 0; - nir_deref *tail = &deref->deref; - LLVMValueRef offset = NULL; - - if (vertex_index_out != NULL || vertex_index_ref != NULL) { - tail = tail->child; - nir_deref_array *deref_array = nir_deref_as_array(tail); - if (vertex_index_out) - *vertex_index_out = deref_array->base_offset; - - if (vertex_index_ref) { - LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false); - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), ""); - } - *vertex_index_ref = vtx; - } - } - - if (deref->var->data.compact) { - assert(tail->child->deref_type == nir_deref_type_array); - assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); - nir_deref_array *deref_array = nir_deref_as_array(tail->child); - /* We always lower indirect dereferences for "compact" array vars. */ - assert(deref_array->deref_array_type == nir_deref_array_type_direct); - - const_offset = deref_array->base_offset; - goto out; - } - - while (tail->child != NULL) { - const struct glsl_type *parent_type = tail->type; - tail = tail->child; - - if (tail->deref_type == nir_deref_type_array) { - nir_deref_array *deref_array = nir_deref_as_array(tail); - LLVMValueRef index, stride, local_offset; - unsigned size = glsl_count_attribute_slots(tail->type, vs_in); - - const_offset += size * deref_array->base_offset; - if (deref_array->deref_array_type == nir_deref_array_type_direct) - continue; - - assert(deref_array->deref_array_type == nir_deref_array_type_indirect); - index = get_src(ctx, deref_array->indirect); - stride = LLVMConstInt(ctx->ac.i32, size, 0); - local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, ""); - - if (offset) - offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, ""); - else - offset = local_offset; - } else if (tail->deref_type == nir_deref_type_struct) { - nir_deref_struct *deref_struct = nir_deref_as_struct(tail); - - for (unsigned i = 0; i < deref_struct->index; i++) { - const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); - const_offset += glsl_count_attribute_slots(ft, vs_in); - } - } else - unreachable("unsupported deref type"); - - } -out: - if (const_offset && offset) - offset = LLVMBuildAdd(ctx->ac.builder, offset, - LLVMConstInt(ctx->ac.i32, const_offset, 0), - ""); - - *const_out = const_offset; - *indir_out = offset; -} - - -/* The offchip buffer layout for TCS->TES is - * - * - attribute 0 of patch 0 vertex 0 - * - attribute 0 of patch 0 vertex 1 - * - attribute 0 of patch 0 vertex 2 - * ... - * - attribute 0 of patch 1 vertex 0 - * - attribute 0 of patch 1 vertex 1 - * ... - * - attribute 1 of patch 0 vertex 0 - * - attribute 1 of patch 0 vertex 1 - * ... - * - per patch attribute 0 of patch 0 - * - per patch attribute 0 of patch 1 - * ... - * - * Note that every attribute has 4 components. - */ -static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx, - LLVMValueRef vertex_index, - LLVMValueRef param_index) -{ - LLVMValueRef base_addr, vertices_per_patch, num_patches; - LLVMValueRef param_stride, constant16; - LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); - - vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false); - num_patches = ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9); - - constant16 = LLVMConstInt(ctx->ac.i32, 16, false); - if (vertex_index) { - base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id, - vertices_per_patch, ""); - - base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, - vertex_index, ""); - - param_stride = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, - num_patches, ""); - } else { - base_addr = rel_patch_id; - param_stride = num_patches; - } - - base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, - LLVMBuildMul(ctx->ac.builder, param_index, - param_stride, ""), ""); - - base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, ""); - - if (!vertex_index) { - LLVMValueRef patch_data_offset = - ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16); - - base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, - patch_data_offset, ""); - } - return base_addr; -} - -static LLVMValueRef get_tcs_tes_buffer_address_params(struct radv_shader_context *ctx, - unsigned param, - unsigned const_index, - bool is_compact, - LLVMValueRef vertex_index, - LLVMValueRef indir_index) -{ - LLVMValueRef param_index; - - if (indir_index) - param_index = LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, param, false), - indir_index, ""); - else { - if (const_index && !is_compact) - param += const_index; - param_index = LLVMConstInt(ctx->ac.i32, param, false); - } - return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); -} - -static void -mark_tess_output(struct radv_shader_context *ctx, - bool is_patch, uint32_t param) - -{ - if (is_patch) { - ctx->tess_patch_outputs_written |= (1ull << param); - } else - ctx->tess_outputs_written |= (1ull << param); -} - -static LLVMValueRef -get_dw_address(struct radv_shader_context *ctx, - LLVMValueRef dw_addr, - unsigned param, - unsigned const_index, - bool compact_const_index, - LLVMValueRef vertex_index, - LLVMValueRef stride, - LLVMValueRef indir_index) - -{ - - if (vertex_index) { - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMBuildMul(ctx->ac.builder, - vertex_index, - stride, ""), ""); - } - - if (indir_index) - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMBuildMul(ctx->ac.builder, indir_index, - LLVMConstInt(ctx->ac.i32, 4, false), ""), ""); - else if (const_index && !compact_const_index) - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, const_index, false), ""); - - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, param * 4, false), ""); - - if (const_index && compact_const_index) - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, const_index, false), ""); - return dw_addr; -} - -static LLVMValueRef -load_tcs_varyings(struct ac_shader_abi *abi, - LLVMTypeRef type, - LLVMValueRef vertex_index, - LLVMValueRef indir_index, - unsigned const_index, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - bool is_patch, - bool is_compact, - bool load_input) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef dw_addr, stride; - LLVMValueRef value[4], result; - unsigned param = shader_io_get_unique_index(location); - - if (load_input) { - stride = ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); - dw_addr = get_tcs_in_current_patch_offset(ctx); - } else { - if (!is_patch) { - stride = get_tcs_out_vertex_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - stride = NULL; - } - } - - dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, - indir_index); - - for (unsigned i = 0; i < num_components + component; i++) { - value[i] = ac_lds_load(&ctx->ac, dw_addr); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - ctx->ac.i32_1, ""); - } - result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); - return result; -} - -static void -store_tcs_output(struct ac_shader_abi *abi, - LLVMValueRef vertex_index, - LLVMValueRef param_index, - unsigned const_index, - unsigned location, - unsigned driver_location, - LLVMValueRef src, - unsigned component, - bool is_patch, - bool is_compact, - unsigned writemask) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef dw_addr; - LLVMValueRef stride = NULL; - LLVMValueRef buf_addr = NULL; - unsigned param; - bool store_lds = true; - - if (is_patch) { - if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0)))) - store_lds = false; - } else { - if (!(ctx->tcs_outputs_read & (1ULL << location))) - store_lds = false; - } - - param = shader_io_get_unique_index(location); - if (location == VARYING_SLOT_CLIP_DIST0 && - is_compact && const_index > 3) { - const_index -= 3; - param++; - } - - if (!is_patch) { - stride = get_tcs_out_vertex_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - } - - mark_tess_output(ctx, is_patch, param); - - dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, - param_index); - buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, - vertex_index, param_index); - - bool is_tess_factor = false; - if (location == VARYING_SLOT_TESS_LEVEL_INNER || - location == VARYING_SLOT_TESS_LEVEL_OUTER) - is_tess_factor = true; - - unsigned base = is_compact ? const_index : 0; - for (unsigned chan = 0; chan < 8; chan++) { - if (!(writemask & (1 << chan))) - continue; - LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); - - if (store_lds || is_tess_factor) { - LLVMValueRef dw_addr_chan = - LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, chan, false), ""); - ac_lds_store(&ctx->ac, dw_addr_chan, value); - } - - if (!is_tess_factor && writemask != 0xF) - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, - buf_addr, ctx->oc_lds, - 4 * (base + chan), 1, 0, true, false); - } - - if (writemask == 0xF) { - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4, - buf_addr, ctx->oc_lds, - (base * 4), 1, 0, true, false); - } -} - -static LLVMValueRef -load_tes_input(struct ac_shader_abi *abi, - LLVMTypeRef type, - LLVMValueRef vertex_index, - LLVMValueRef param_index, - unsigned const_index, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - bool is_patch, - bool is_compact, - bool load_input) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef buf_addr; - LLVMValueRef result; - unsigned param = shader_io_get_unique_index(location); - - if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { - const_index -= 3; - param++; - } - - buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, - is_compact, vertex_index, param_index); - - LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false); - buf_addr = LLVMBuildAdd(ctx->ac.builder, buf_addr, comp_offset, ""); - - result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL, - buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); - result = ac_trim_vector(&ctx->ac, result, num_components); - return result; -} - -static LLVMValueRef -load_gs_input(struct ac_shader_abi *abi, - unsigned location, - unsigned driver_location, - unsigned component, - unsigned num_components, - unsigned vertex_index, - unsigned const_index, - LLVMTypeRef type) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef vtx_offset; - unsigned param, vtx_offset_param; - LLVMValueRef value[4], result; - - vtx_offset_param = vertex_index; - assert(vtx_offset_param < 6); - vtx_offset = LLVMBuildMul(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param], - LLVMConstInt(ctx->ac.i32, 4, false), ""); - - param = shader_io_get_unique_index(location); - - for (unsigned i = component; i < num_components + component; i++) { - if (ctx->ac.chip_class >= GFX9) { - LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, - LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); - value[i] = ac_lds_load(&ctx->ac, dw_addr); - } else { - LLVMValueRef soffset = - LLVMConstInt(ctx->ac.i32, - (param * 4 + i + const_index) * 256, - false); - - value[i] = ac_build_buffer_load(&ctx->ac, - ctx->esgs_ring, 1, - ctx->ac.i32_0, - vtx_offset, soffset, - 0, 1, 0, true, false); - - value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], - type, ""); - } - } - result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); - result = ac_to_integer(&ctx->ac, result); - return result; -} - -static LLVMValueRef -build_gep_for_deref(struct ac_nir_context *ctx, - nir_deref_var *deref) -{ - struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); - assert(entry->data); - LLVMValueRef val = entry->data; - nir_deref *tail = deref->deref.child; - while (tail != NULL) { - LLVMValueRef offset; - switch (tail->deref_type) { - case nir_deref_type_array: { - nir_deref_array *array = nir_deref_as_array(tail); - offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0); - if (array->deref_array_type == - nir_deref_array_type_indirect) { - offset = LLVMBuildAdd(ctx->ac.builder, offset, - get_src(ctx, - array->indirect), - ""); - } - break; - } - case nir_deref_type_struct: { - nir_deref_struct *deref_struct = - nir_deref_as_struct(tail); - offset = LLVMConstInt(ctx->ac.i32, - deref_struct->index, 0); - break; - } - default: - unreachable("bad deref type"); - } - val = ac_build_gep0(&ctx->ac, val, offset); - tail = tail->child; - } - return val; -} - -static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr, - bool load_inputs) -{ - LLVMValueRef result; - LLVMValueRef vertex_index = NULL; - LLVMValueRef indir_index = NULL; - unsigned const_index = 0; - unsigned location = instr->variables[0]->var->data.location; - unsigned driver_location = instr->variables[0]->var->data.driver_location; - const bool is_patch = instr->variables[0]->var->data.patch; - const bool is_compact = instr->variables[0]->var->data.compact; - - get_deref_offset(ctx, instr->variables[0], - false, NULL, is_patch ? NULL : &vertex_index, - &const_index, &indir_index); - - LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa); - - LLVMTypeRef src_component_type; - if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind) - src_component_type = LLVMGetElementType(dest_type); - else - src_component_type = dest_type; - - result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type, - vertex_index, indir_index, - const_index, location, driver_location, - instr->variables[0]->var->data.location_frac, - instr->num_components, - is_patch, is_compact, load_inputs); - return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); -} - -static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - LLVMValueRef values[8]; - int idx = instr->variables[0]->var->data.driver_location; - int ve = instr->dest.ssa.num_components; - unsigned comp = instr->variables[0]->var->data.location_frac; - LLVMValueRef indir_index; - LLVMValueRef ret; - unsigned const_index; - unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4; - bool vs_in = ctx->stage == MESA_SHADER_VERTEX && - instr->variables[0]->var->data.mode == nir_var_shader_in; - get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, - &const_index, &indir_index); - - if (instr->dest.ssa.bit_size == 64) - ve *= 2; - - switch (instr->variables[0]->var->data.mode) { - case nir_var_shader_in: - if (ctx->stage == MESA_SHADER_TESS_CTRL || - ctx->stage == MESA_SHADER_TESS_EVAL) { - return load_tess_varyings(ctx, instr, true); - } - - if (ctx->stage == MESA_SHADER_GEOMETRY) { - LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); - LLVMValueRef indir_index; - unsigned const_index, vertex_index; - get_deref_offset(ctx, instr->variables[0], - false, &vertex_index, NULL, - &const_index, &indir_index); - - return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location, - instr->variables[0]->var->data.driver_location, - instr->variables[0]->var->data.location_frac, - instr->num_components, vertex_index, const_index, type); - } - - for (unsigned chan = comp; chan < ve + comp; chan++) { - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - instr->variables[0]->var->type, - ctx->stage == MESA_SHADER_VERTEX); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->abi->inputs + idx + chan, count, - stride, false, true); - - values[chan] = LLVMBuildExtractElement(ctx->ac.builder, - tmp_vec, - indir_index, ""); - } else - values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; - } + case nir_op_fsign: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = ac_build_fsign(&ctx->ac, src[0], + instr->dest.dest.ssa.bit_size); break; - case nir_var_local: - for (unsigned chan = 0; chan < ve; chan++) { - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - instr->variables[0]->var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->locals + idx + chan, count, - stride, true, true); - - values[chan] = LLVMBuildExtractElement(ctx->ac.builder, - tmp_vec, - indir_index, ""); - } else { - values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], ""); - } - } + case nir_op_ffloor: + result = emit_intrin_1f_param(&ctx->ac, "llvm.floor", + ac_to_float_type(&ctx->ac, def_type), src[0]); break; - case nir_var_shared: { - LLVMValueRef address = build_gep_for_deref(ctx, - instr->variables[0]); - LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); - return LLVMBuildBitCast(ctx->ac.builder, val, - get_def_type(ctx, &instr->dest.ssa), - ""); - } - case nir_var_shader_out: - if (ctx->stage == MESA_SHADER_TESS_CTRL) { - return load_tess_varyings(ctx, instr, false); - } - - for (unsigned chan = comp; chan < ve + comp; chan++) { - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - instr->variables[0]->var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->abi->outputs + idx + chan, count, - stride, true, true); - - values[chan] = LLVMBuildExtractElement(ctx->ac.builder, - tmp_vec, - indir_index, ""); - } else { - values[chan] = LLVMBuildLoad(ctx->ac.builder, - ctx->abi->outputs[idx + chan + const_index * stride], - ""); - } - } + case nir_op_ftrunc: + result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc", + ac_to_float_type(&ctx->ac, def_type), src[0]); break; - default: - unreachable("unhandle variable mode"); - } - ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp); - return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); -} - -static void -visit_store_var(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - LLVMValueRef temp_ptr, value; - int idx = instr->variables[0]->var->data.driver_location; - unsigned comp = instr->variables[0]->var->data.location_frac; - LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); - int writemask = instr->const_index[0] << comp; - LLVMValueRef indir_index; - unsigned const_index; - get_deref_offset(ctx, instr->variables[0], false, - NULL, NULL, &const_index, &indir_index); - - if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { - - src = LLVMBuildBitCast(ctx->ac.builder, src, - LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), - ""); - - writemask = widen_mask(writemask, 2); - } - - switch (instr->variables[0]->var->data.mode) { - case nir_var_shader_out: - - if (ctx->stage == MESA_SHADER_TESS_CTRL) { - LLVMValueRef vertex_index = NULL; - LLVMValueRef indir_index = NULL; - unsigned const_index = 0; - const unsigned location = instr->variables[0]->var->data.location; - const unsigned driver_location = instr->variables[0]->var->data.driver_location; - const unsigned comp = instr->variables[0]->var->data.location_frac; - const bool is_patch = instr->variables[0]->var->data.patch; - const bool is_compact = instr->variables[0]->var->data.compact; - - get_deref_offset(ctx, instr->variables[0], - false, NULL, is_patch ? NULL : &vertex_index, - &const_index, &indir_index); - - ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, - const_index, location, driver_location, - src, comp, is_patch, is_compact, writemask); - return; - } - - for (unsigned chan = 0; chan < 8; chan++) { - int stride = 4; - if (!(writemask & (1 << chan))) - continue; - - value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp); - - if (instr->variables[0]->var->data.compact) - stride = 1; - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - instr->variables[0]->var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->abi->outputs + idx + chan, count, - stride, true, true); - - tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, - value, indir_index, ""); - build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan, - count, stride, tmp_vec); - - } else { - temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride]; - - LLVMBuildStore(ctx->ac.builder, value, temp_ptr); - } - } + case nir_op_fceil: + result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil", + ac_to_float_type(&ctx->ac, def_type), src[0]); break; - case nir_var_local: - for (unsigned chan = 0; chan < 8; chan++) { - if (!(writemask & (1 << chan))) - continue; - - value = ac_llvm_extract_elem(&ctx->ac, src, chan); - if (indir_index) { - unsigned count = glsl_count_attribute_slots( - instr->variables[0]->var->type, false); - count -= chan / 4; - LLVMValueRef tmp_vec = ac_build_gather_values_extended( - &ctx->ac, ctx->locals + idx + chan, count, - 4, true, true); - - tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, - value, indir_index, ""); - build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, - count, 4, tmp_vec); - } else { - temp_ptr = ctx->locals[idx + chan + const_index * 4]; - - LLVMBuildStore(ctx->ac.builder, value, temp_ptr); - } - } + case nir_op_fround_even: + result = emit_intrin_1f_param(&ctx->ac, "llvm.rint", + ac_to_float_type(&ctx->ac, def_type),src[0]); break; - case nir_var_shared: { - int writemask = instr->const_index[0]; - LLVMValueRef address = build_gep_for_deref(ctx, - instr->variables[0]); - LLVMValueRef val = get_src(ctx, instr->src[0]); - unsigned components = - glsl_get_vector_elements( - nir_deref_tail(&instr->variables[0]->deref)->type); - if (writemask == (1 << components) - 1) { - val = LLVMBuildBitCast( - ctx->ac.builder, val, - LLVMGetElementType(LLVMTypeOf(address)), ""); - LLVMBuildStore(ctx->ac.builder, val, address); - } else { - for (unsigned chan = 0; chan < 4; chan++) { - if (!(writemask & (1 << chan))) - continue; - LLVMValueRef ptr = - LLVMBuildStructGEP(ctx->ac.builder, - address, chan, ""); - LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, - chan); - src = LLVMBuildBitCast( - ctx->ac.builder, src, - LLVMGetElementType(LLVMTypeOf(ptr)), ""); - LLVMBuildStore(ctx->ac.builder, src, ptr); - } - } + case nir_op_ffract: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = ac_build_fract(&ctx->ac, src[0], + instr->dest.dest.ssa.bit_size); break; - } - default: + case nir_op_fsin: + result = emit_intrin_1f_param(&ctx->ac, "llvm.sin", + ac_to_float_type(&ctx->ac, def_type), src[0]); break; - } -} - -static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) -{ - switch (dim) { - case GLSL_SAMPLER_DIM_BUF: - return 1; - case GLSL_SAMPLER_DIM_1D: - return array ? 2 : 1; - case GLSL_SAMPLER_DIM_2D: - return array ? 3 : 2; - case GLSL_SAMPLER_DIM_MS: - return array ? 4 : 3; - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - return 3; - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_SUBPASS: - return 2; - case GLSL_SAMPLER_DIM_SUBPASS_MS: - return 3; - default: + case nir_op_fcos: + result = emit_intrin_1f_param(&ctx->ac, "llvm.cos", + ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_fsqrt: + result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", + ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_fexp2: + result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2", + ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_flog2: + result = emit_intrin_1f_param(&ctx->ac, "llvm.log2", + ac_to_float_type(&ctx->ac, def_type), src[0]); + break; + case nir_op_frsq: + result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", + ac_to_float_type(&ctx->ac, def_type), src[0]); + result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1, + result); + break; + case nir_op_fmax: + result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", + ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); + if (ctx->ac.chip_class < GFX9 && + instr->dest.dest.ssa.bit_size == 32) { + /* Only pre-GFX9 chips do not flush denorms. */ + result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", + ac_to_float_type(&ctx->ac, def_type), + result); + } break; - } - return 0; -} - -static bool -glsl_is_array_image(const struct glsl_type *type) -{ - const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); - - if (glsl_sampler_type_is_array(type)) - return true; - - return dim == GLSL_SAMPLER_DIM_CUBE || - dim == GLSL_SAMPLER_DIM_3D || - dim == GLSL_SAMPLER_DIM_SUBPASS || - dim == GLSL_SAMPLER_DIM_SUBPASS_MS; -} - - -/* Adjust the sample index according to FMASK. - * - * For uncompressed MSAA surfaces, FMASK should return 0x76543210, - * which is the identity mapping. Each nibble says which physical sample - * should be fetched to get that sample. - * - * For example, 0x11111100 means there are only 2 samples stored and - * the second sample covers 3/4 of the pixel. When reading samples 0 - * and 1, return physical sample 0 (determined by the first two 0s - * in FMASK), otherwise return physical sample 1. - * - * The sample index should be adjusted as follows: - * sample_index = (fmask >> (sample_index * 4)) & 0xF; - */ -static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, - LLVMValueRef coord_x, LLVMValueRef coord_y, - LLVMValueRef coord_z, - LLVMValueRef sample_index, - LLVMValueRef fmask_desc_ptr) -{ - LLVMValueRef fmask_load_address[4]; - LLVMValueRef res; - - fmask_load_address[0] = coord_x; - fmask_load_address[1] = coord_y; - if (coord_z) { - fmask_load_address[2] = coord_z; - fmask_load_address[3] = LLVMGetUndef(ctx->i32); - } - - struct ac_image_args args = {0}; - - args.opcode = ac_image_load; - args.da = coord_z ? true : false; - args.resource = fmask_desc_ptr; - args.dmask = 0xf; - args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); - - res = ac_build_image_opcode(ctx, &args); - - res = ac_to_integer(ctx, res); - LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); - LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); - - LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, - res, - ctx->i32_0, ""); - - LLVMValueRef sample_index4 = - LLVMBuildMul(ctx->builder, sample_index, four, ""); - LLVMValueRef shifted_fmask = - LLVMBuildLShr(ctx->builder, fmask, sample_index4, ""); - LLVMValueRef final_sample = - LLVMBuildAnd(ctx->builder, shifted_fmask, F, ""); - - /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK - * resource descriptor is 0 (invalid), - */ - LLVMValueRef fmask_desc = - LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, - ctx->v8i32, ""); - - LLVMValueRef fmask_word1 = - LLVMBuildExtractElement(ctx->builder, fmask_desc, - ctx->i32_1, ""); - - LLVMValueRef word1_is_nonzero = - LLVMBuildICmp(ctx->builder, LLVMIntNE, - fmask_word1, ctx->i32_0, ""); - - /* Replace the MSAA sample index. */ - sample_index = - LLVMBuildSelect(ctx->builder, word1_is_nonzero, - final_sample, sample_index, ""); - return sample_index; -} - -static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - const struct glsl_type *type = glsl_without_array(instr->variables[0]->var->type); - - LLVMValueRef src0 = get_src(ctx, instr->src[0]); - LLVMValueRef coords[4]; - LLVMValueRef masks[] = { - LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), - LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), - }; - LLVMValueRef res; - LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); - - int count; - enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); - bool is_array = glsl_sampler_type_is_array(type); - bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS || - dim == GLSL_SAMPLER_DIM_SUBPASS_MS); - bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || - dim == GLSL_SAMPLER_DIM_SUBPASS_MS); - bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D; - count = image_type_to_components_count(dim, is_array); - - if (is_ms) { - LLVMValueRef fmask_load_address[3]; - int chan; - - fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); - fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], ""); - if (is_array) - fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], ""); - else - fmask_load_address[2] = NULL; - if (add_frag_pos) { - for (chan = 0; chan < 2; ++chan) - fmask_load_address[chan] = - LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan], - LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan], - ctx->ac.i32, ""), ""); - fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]); + case nir_op_fmin: + result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", + ac_to_float_type(&ctx->ac, def_type), src[0], src[1]); + if (ctx->ac.chip_class < GFX9 && + instr->dest.dest.ssa.bit_size == 32) { + /* Only pre-GFX9 chips do not flush denorms. */ + result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize", + ac_to_float_type(&ctx->ac, def_type), + result); } - sample_index = adjust_sample_index_using_fmask(&ctx->ac, - fmask_load_address[0], - fmask_load_address[1], - fmask_load_address[2], - sample_index, - get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, NULL, true, false)); - } - if (count == 1 && !gfx9_1d) { - if (instr->src[0].ssa->num_components) - res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); + break; + case nir_op_ffma: + result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd", + ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); + break; + case nir_op_ldexp: + src[0] = ac_to_float(&ctx->ac, src[0]); + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32) + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE); else - res = src0; - } else { - int chan; - if (is_ms) - count--; - for (chan = 0; chan < count; ++chan) { - coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan); - } - if (add_frag_pos) { - for (chan = 0; chan < 2; ++chan) - coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan], - ctx->ac.i32, ""), ""); - coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]); - count++; - } - - if (gfx9_1d) { - if (is_array) { - coords[2] = coords[1]; - coords[1] = ctx->ac.i32_0; - } else - coords[1] = ctx->ac.i32_0; - count++; - } - - if (is_ms) { - coords[count] = sample_index; - count++; - } - - if (count == 3) { - coords[3] = LLVMGetUndef(ctx->ac.i32); - count = 4; + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE); + break; + case nir_op_ibitfield_extract: + result = emit_bitfield_extract(&ctx->ac, true, src); + break; + case nir_op_ubitfield_extract: + result = emit_bitfield_extract(&ctx->ac, false, src); + break; + case nir_op_bitfield_insert: + result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]); + break; + case nir_op_bitfield_reverse: + result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); + break; + case nir_op_bit_count: + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32) + result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE); + else { + result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE); + result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); } - res = ac_build_gather_values(&ctx->ac, coords, count); - } - return res; -} - -static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef params[7]; - LLVMValueRef res; - char intrinsic_name[64]; - const nir_variable *var = instr->variables[0]->var; - const struct glsl_type *type = var->type; - - if(instr->variables[0]->deref.child) - type = instr->variables[0]->deref.child->type; - - type = glsl_without_array(type); - - const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); - if (dim == GLSL_SAMPLER_DIM_BUF) { - unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); - unsigned num_channels = util_last_bit(mask); - LLVMValueRef rsrc, vindex; + break; + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + src[i] = ac_to_integer(&ctx->ac, src[i]); + result = ac_build_gather_values(&ctx->ac, src, num_components); + break; + case nir_op_f2i32: + case nir_op_f2i64: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_f2u32: + case nir_op_f2u64: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_i2f32: + case nir_op_i2f64: + src[0] = ac_to_integer(&ctx->ac, src[0]); + result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + break; + case nir_op_u2f32: + case nir_op_u2f64: + src[0] = ac_to_integer(&ctx->ac, src[0]); + result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + break; + case nir_op_f2f64: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + break; + case nir_op_f2f32: + src[0] = ac_to_float(&ctx->ac, src[0]); + result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), ""); + break; + case nir_op_u2u32: + case nir_op_u2u64: + src[0] = ac_to_integer(&ctx->ac, src[0]); + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_i2i32: + case nir_op_i2i64: + src[0] = ac_to_integer(&ctx->ac, src[0]); + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type)) + result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, ""); + else + result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, ""); + break; + case nir_op_bcsel: + result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]); + break; + case nir_op_find_lsb: + src[0] = ac_to_integer(&ctx->ac, src[0]); + result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]); + break; + case nir_op_ufind_msb: + src[0] = ac_to_integer(&ctx->ac, src[0]); + result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32); + break; + case nir_op_ifind_msb: + src[0] = ac_to_integer(&ctx->ac, src[0]); + result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32); + break; + case nir_op_uadd_carry: + src[0] = ac_to_integer(&ctx->ac, src[0]); + src[1] = ac_to_integer(&ctx->ac, src[1]); + result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]); + break; + case nir_op_usub_borrow: + src[0] = ac_to_integer(&ctx->ac, src[0]); + src[1] = ac_to_integer(&ctx->ac, src[1]); + result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]); + break; + case nir_op_b2f: + result = emit_b2f(&ctx->ac, src[0]); + break; + case nir_op_f2b: + result = emit_f2b(&ctx->ac, src[0]); + break; + case nir_op_b2i: + result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size); + break; + case nir_op_i2b: + src[0] = ac_to_integer(&ctx->ac, src[0]); + result = emit_i2b(&ctx->ac, src[0]); + break; + case nir_op_fquantize2f16: + result = emit_f2f16(&ctx->ac, src[0]); + break; + case nir_op_umul_high: + src[0] = ac_to_integer(&ctx->ac, src[0]); + src[1] = ac_to_integer(&ctx->ac, src[1]); + result = emit_umul_high(&ctx->ac, src[0], src[1]); + break; + case nir_op_imul_high: + src[0] = ac_to_integer(&ctx->ac, src[0]); + src[1] = ac_to_integer(&ctx->ac, src[1]); + result = emit_imul_high(&ctx->ac, src[0], src[1]); + break; + case nir_op_pack_half_2x16: + result = emit_pack_half_2x16(&ctx->ac, src[0]); + break; + case nir_op_unpack_half_2x16: + result = emit_unpack_half_2x16(&ctx->ac, src[0]); + break; + case nir_op_fddx: + case nir_op_fddy: + case nir_op_fddx_fine: + case nir_op_fddy_fine: + case nir_op_fddx_coarse: + case nir_op_fddy_coarse: + result = emit_ddxy(ctx, instr->op, src[0]); + break; - rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false); - vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + case nir_op_unpack_64_2x32_split_x: { + assert(ac_get_llvm_num_components(src[0]) == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], + ctx->ac.v2i32, + ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, ""); - - /* TODO: set "glc" and "can_speculate" when OpenGL needs it. */ - res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, - ctx->ac.i32_0, num_channels, - false, false); - res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); - - res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); - res = ac_to_integer(&ctx->ac, res); - } else { - LLVMValueRef da = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false; - LLVMValueRef slc = ctx->ac.i1false; - - params[0] = get_image_coords(ctx, instr); - params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); - params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ - params[3] = (var->data.image._volatile || var->data.image.coherent) ? - ctx->ac.i1true : ctx->ac.i1false; - params[4] = slc; - params[5] = ctx->ac.i1false; - params[6] = da; - - ac_get_image_intr_name("llvm.amdgcn.image.load", - ctx->ac.v4f32, /* vdata */ - LLVMTypeOf(params[0]), /* coords */ - LLVMTypeOf(params[1]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); - - res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32, - params, 7, AC_FUNC_ATTR_READONLY); + break; } - return ac_to_integer(&ctx->ac, res); -} - -static void visit_image_store(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) -{ - LLVMValueRef params[8]; - char intrinsic_name[64]; - const nir_variable *var = instr->variables[0]->var; - const struct glsl_type *type = glsl_without_array(var->type); - const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); - LLVMValueRef glc = ctx->ac.i1false; - bool force_glc = ctx->ac.chip_class == SI; - if (force_glc) - glc = ctx->ac.i1true; - - if (dim == GLSL_SAMPLER_DIM_BUF) { - params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */ - params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true); - params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), - ctx->ac.i32_0, ""); /* vindex */ - params[3] = ctx->ac.i32_0; /* voffset */ - params[4] = glc; /* glc */ - params[5] = ctx->ac.i1false; /* slc */ - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, - params, 6, 0); - } else { - LLVMValueRef da = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false; - LLVMValueRef slc = ctx->ac.i1false; - - params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); - params[1] = get_image_coords(ctx, instr); /* coords */ - params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true); - params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ - params[4] = (force_glc || var->data.image._volatile || var->data.image.coherent) ? - ctx->ac.i1true : ctx->ac.i1false; - params[5] = slc; - params[6] = ctx->ac.i1false; - params[7] = da; - - ac_get_image_intr_name("llvm.amdgcn.image.store", - LLVMTypeOf(params[0]), /* vdata */ - LLVMTypeOf(params[1]), /* coords */ - LLVMTypeOf(params[2]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); - ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt, - params, 8, 0); + case nir_op_unpack_64_2x32_split_y: { + assert(ac_get_llvm_num_components(src[0]) == 1); + LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], + ctx->ac.v2i32, + ""); + result = LLVMBuildExtractElement(ctx->ac.builder, tmp, + ctx->ac.i32_1, ""); + break; } -} - -static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef params[7]; - int param_count = 0; - const nir_variable *var = instr->variables[0]->var; - - const char *atomic_name; - char intrinsic_name[41]; - const struct glsl_type *type = glsl_without_array(var->type); - MAYBE_UNUSED int length; - - bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT; - - switch (instr->intrinsic) { - case nir_intrinsic_image_atomic_add: - atomic_name = "add"; - break; - case nir_intrinsic_image_atomic_min: - atomic_name = is_unsigned ? "umin" : "smin"; - break; - case nir_intrinsic_image_atomic_max: - atomic_name = is_unsigned ? "umax" : "smax"; - break; - case nir_intrinsic_image_atomic_and: - atomic_name = "and"; - break; - case nir_intrinsic_image_atomic_or: - atomic_name = "or"; - break; - case nir_intrinsic_image_atomic_xor: - atomic_name = "xor"; + case nir_op_pack_64_2x32_split: { + LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32); + tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, + src[0], ctx->ac.i32_0, ""); + tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp, + src[1], ctx->ac.i32_1, ""); + result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, ""); break; - case nir_intrinsic_image_atomic_exchange: - atomic_name = "swap"; + } + + case nir_op_cube_face_coord: { + src[0] = ac_to_float(&ctx->ac, src[0]); + LLVMValueRef results[2]; + LLVMValueRef in[3]; + for (unsigned chan = 0; chan < 3; chan++) + in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); + results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", + ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); + results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", + ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); + result = ac_build_gather_values(&ctx->ac, results, 2); break; - case nir_intrinsic_image_atomic_comp_swap: - atomic_name = "cmpswap"; + } + + case nir_op_cube_face_index: { + src[0] = ac_to_float(&ctx->ac, src[0]); + LLVMValueRef in[3]; + for (unsigned chan = 0; chan < 3; chan++) + in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan); + result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", + ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE); break; + } + default: + fprintf(stderr, "Unknown NIR alu instr: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); abort(); } - if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) - params[param_count++] = get_src(ctx, instr->src[3]); - params[param_count++] = get_src(ctx, instr->src[2]); - - if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { - params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, - NULL, true, true); - params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), - ctx->ac.i32_0, ""); /* vindex */ - params[param_count++] = ctx->ac.i32_0; /* voffset */ - params[param_count++] = ctx->ac.i1false; /* slc */ - - length = snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.buffer.atomic.%s", atomic_name); - } else { - char coords_type[8]; - - LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr); - params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, - NULL, true, true); - params[param_count++] = ctx->ac.i1false; /* r128 */ - params[param_count++] = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false; /* da */ - params[param_count++] = ctx->ac.i1false; /* slc */ - - build_int_type_name(LLVMTypeOf(coords), - coords_type, sizeof(coords_type)); - - length = snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type); + if (result) { + assert(instr->dest.dest.is_ssa); + result = ac_to_integer(&ctx->ac, result); + _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa, + result); } - - assert(length < sizeof(intrinsic_name)); - return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0); } -static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) +static void visit_load_const(struct ac_nir_context *ctx, + const nir_load_const_instr *instr) { - const nir_variable *var = instr->variables[0]->var; - const struct glsl_type *type = glsl_without_array(var->type); + LLVMValueRef values[4], value = NULL; + LLVMTypeRef element_type = + LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); - struct ac_image_args args = { 0 }; - args.da = glsl_is_array_image(type); - args.dmask = 0xf; - args.resource = get_sampler_desc(ctx, instr->variables[0], - AC_DESC_IMAGE, NULL, true, false); - args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; + for (unsigned i = 0; i < instr->def.num_components; ++i) { + switch (instr->def.bit_size) { + case 32: + values[i] = LLVMConstInt(element_type, + instr->value.u32[i], false); + break; + case 64: + values[i] = LLVMConstInt(element_type, + instr->value.u64[i], false); + break; + default: + fprintf(stderr, + "unsupported nir load_const bit_size: %d\n", + instr->def.bit_size); + abort(); + } + } + if (instr->def.num_components > 1) { + value = LLVMConstVector(values, instr->def.num_components); + } else + value = values[0]; - return ac_build_image_opcode(&ctx->ac, &args); + _mesa_hash_table_insert(ctx->defs, &instr->def, value); } -static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) +static LLVMValueRef +get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements) { - LLVMValueRef res; - const nir_variable *var = instr->variables[0]->var; - const struct glsl_type *type = glsl_without_array(var->type); - - if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) - return get_buffer_size(ctx, - get_sampler_desc(ctx, instr->variables[0], - AC_DESC_BUFFER, NULL, true, false), true); - - struct ac_image_args args = { 0 }; - - args.da = glsl_is_array_image(type); - args.dmask = 0xf; - args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); - args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; - - res = ac_build_image_opcode(&ctx->ac, &args); - - LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef size = + LLVMBuildExtractElement(ctx->ac.builder, descriptor, + LLVMConstInt(ctx->ac.i32, 2, false), ""); - if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && - glsl_sampler_type_is_array(type)) { - LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); - z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); - res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, ""); - } - if (ctx->ac.chip_class >= GFX9 && - glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D && - glsl_sampler_type_is_array(type)) { - LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); - res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, + /* VI only */ + if (ctx->ac.chip_class == VI && in_elements) { + /* On VI, the descriptor contains the size in bytes, + * but TXQ must return the size in elements. + * The stride is always non-zero for resources using TXQ. + */ + LLVMValueRef stride = + LLVMBuildExtractElement(ctx->ac.builder, descriptor, ctx->ac.i32_1, ""); + stride = LLVMBuildLShr(ctx->ac.builder, stride, + LLVMConstInt(ctx->ac.i32, 16, false), ""); + stride = LLVMBuildAnd(ctx->ac.builder, stride, + LLVMConstInt(ctx->ac.i32, 0x3fff, false), ""); + size = LLVMBuildUDiv(ctx->ac.builder, size, stride, ""); } - return res; -} - -#define NOOP_WAITCNT 0xf7f -#define LGKM_CNT 0x07f -#define VM_CNT 0xf70 - -static void emit_membar(struct ac_llvm_context *ac, - const nir_intrinsic_instr *instr) -{ - unsigned waitcnt = NOOP_WAITCNT; - - switch (instr->intrinsic) { - case nir_intrinsic_memory_barrier: - case nir_intrinsic_group_memory_barrier: - waitcnt &= VM_CNT & LGKM_CNT; - break; - case nir_intrinsic_memory_barrier_atomic_counter: - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_memory_barrier_image: - waitcnt &= VM_CNT; - break; - case nir_intrinsic_memory_barrier_shared: - waitcnt &= LGKM_CNT; - break; - default: - break; - } - if (waitcnt != NOOP_WAITCNT) - ac_build_waitcnt(ac, waitcnt); + return size; } -void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) +/** + * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with + * intrinsic names). + */ +static void build_int_type_name( + LLVMTypeRef type, + char *buf, unsigned bufsize) { - /* SI only (thanks to a hw bug workaround): - * The real barrier instruction isn’t needed, because an entire patch - * always fits into a single wave. - */ - if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) { - ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); - return; - } - ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier", - ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); -} + assert(bufsize >= 6); -static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - ac_build_kill_if_false(&ctx->ac, visible); + if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) + snprintf(buf, bufsize, "v%ui32", + LLVMGetVectorSize(type)); + else + strcpy(buf, "i32"); } -static void emit_discard(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) +static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx, + struct ac_image_args *args, + const nir_tex_instr *instr) { - LLVMValueRef cond; - - if (instr->intrinsic == nir_intrinsic_discard_if) { - cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, - get_src(ctx, instr->src[0]), - ctx->ac.i32_0, ""); - } else { - assert(instr->intrinsic == nir_intrinsic_discard); - cond = LLVMConstInt(ctx->ac.i1, false, 0); - } + enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); + LLVMValueRef coord = args->addr; + LLVMValueRef half_texel[2]; + LLVMValueRef compare_cube_wa = NULL; + LLVMValueRef result; + int c; + unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare; - ctx->abi->emit_kill(ctx->abi, cond); -} + //TODO Rect + { + struct ac_image_args txq_args = { 0 }; -static LLVMValueRef -visit_load_helper_invocation(struct ac_nir_context *ctx) -{ - LLVMValueRef result = ac_build_intrinsic(&ctx->ac, - "llvm.amdgcn.ps.live", - ctx->ac.i1, NULL, 0, - AC_FUNC_ATTR_READNONE); - result = LLVMBuildNot(ctx->ac.builder, result, ""); - return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, ""); -} + txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; + txq_args.opcode = ac_image_get_resinfo; + txq_args.dmask = 0xf; + txq_args.addr = ctx->i32_0; + txq_args.resource = args->resource; + LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); -static LLVMValueRef -visit_load_local_invocation_index(struct ac_nir_context *ctx) -{ - LLVMValueRef result; - LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac); - result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size, - LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); + for (c = 0; c < 2; c++) { + half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, + LLVMConstInt(ctx->i32, c, false), ""); + half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); + half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]); + half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c], + LLVMConstReal(ctx->f32, -0.5), ""); + } + } - return LLVMBuildAdd(ctx->ac.builder, result, thread_id, ""); -} + LLVMValueRef orig_coords = args->addr; -static LLVMValueRef -visit_load_subgroup_id(struct ac_nir_context *ctx) -{ - if (ctx->stage == MESA_SHADER_COMPUTE) { - LLVMValueRef result; - result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size, - LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); - return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), ""); - } else { - return LLVMConstInt(ctx->ac.i32, 0, false); + for (c = 0; c < 2; c++) { + LLVMValueRef tmp; + LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); + tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); + tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); + tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); + tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); + coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); } -} -static LLVMValueRef -visit_load_num_subgroups(struct ac_nir_context *ctx) -{ - if (ctx->stage == MESA_SHADER_COMPUTE) { - return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size, - LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); - } else { - return LLVMConstInt(ctx->ac.i32, 1, false); - } -} -static LLVMValueRef -visit_first_invocation(struct ac_nir_context *ctx) -{ - LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); + /* + * Apparantly cube has issue with integer types that the workaround doesn't solve, + * so this tests if the format is 8_8_8_8 and an integer type do an alternate + * workaround by sampling using a scaled type and converting. + * This is taken from amdgpu-pro shaders. + */ + /* NOTE this produces some ugly code compared to amdgpu-pro, + * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select, + * and then reads them back. -pro generates two selects, + * one s_cmp for the descriptor rewriting + * one v_cmp for the coordinate and result changes. + */ + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + LLVMValueRef tmp, tmp2; - /* The second argument is whether cttz(0) should be defined, but we do not care. */ - LLVMValueRef args[] = {active_set, LLVMConstInt(ctx->ac.i1, 0, false)}; - LLVMValueRef result = ac_build_intrinsic(&ctx->ac, - "llvm.cttz.i64", - ctx->ac.i64, args, 2, - AC_FUNC_ATTR_NOUNWIND | - AC_FUNC_ATTR_READNONE); + /* workaround 8/8/8/8 uint/sint cube gather bug */ + /* first detect it then change to a scaled read and f2i */ + tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, ""); + tmp2 = tmp; - return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); -} + /* extract the DATA_FORMAT */ + tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false), + LLVMConstInt(ctx->i32, 6, false), false); -static LLVMValueRef -visit_load_shared(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef values[4], derived_ptr, index, ret; + /* is the DATA_FORMAT == 8_8_8_8 */ + compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), ""); - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]); + if (stype == GLSL_TYPE_UINT) + /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */ + tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false), + LLVMConstInt(ctx->i32, 0x10000000, false), ""); + else + /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */ + tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false), + LLVMConstInt(ctx->i32, 0x14000000, false), ""); - for (int chan = 0; chan < instr->num_components; chan++) { - index = LLVMConstInt(ctx->ac.i32, chan, 0); - derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); - values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, ""); - } + /* replace the NUM FORMAT in the descriptor */ + tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), ""); + tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, ""); - ret = ac_build_gather_values(&ctx->ac, values, instr->num_components); - return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); -} + args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); -static void -visit_store_shared(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) -{ - LLVMValueRef derived_ptr, data,index; - LLVMBuilderRef builder = ctx->ac.builder; + /* don't modify the coordinates for this case */ + coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); + } + args->addr = coord; + result = ac_build_image_opcode(ctx, args); - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]); - LLVMValueRef src = get_src(ctx, instr->src[0]); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { + LLVMValueRef tmp, tmp2; - int writemask = nir_intrinsic_write_mask(instr); - for (int chan = 0; chan < 4; chan++) { - if (!(writemask & (1 << chan))) { - continue; + /* if the cube workaround is in place, f2i the result. */ + for (c = 0; c < 4; c++) { + tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); + if (stype == GLSL_TYPE_UINT) + tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); + else + tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, ""); + tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); + tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, ""); + tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, ""); + tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); + result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), ""); } - data = ac_llvm_extract_elem(&ctx->ac, src, chan); - index = LLVMConstInt(ctx->ac.i32, chan, 0); - derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); - LLVMBuildStore(builder, data, derived_ptr); } + return result; } -static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr, - LLVMValueRef ptr, int src_idx) +static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, + const nir_tex_instr *instr, + bool lod_is_zero, + struct ac_image_args *args) { - LLVMValueRef result; - LLVMValueRef src = get_src(ctx, instr->src[src_idx]); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); - if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap || - instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) { - LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]); - result = LLVMBuildAtomicCmpXchg(ctx->ac.builder, - ptr, src, src1, - LLVMAtomicOrderingSequentiallyConsistent, - LLVMAtomicOrderingSequentiallyConsistent, - false); - } else { - LLVMAtomicRMWBinOp op; - switch (instr->intrinsic) { - case nir_intrinsic_var_atomic_add: - case nir_intrinsic_shared_atomic_add: - op = LLVMAtomicRMWBinOpAdd; - break; - case nir_intrinsic_var_atomic_umin: - case nir_intrinsic_shared_atomic_umin: - op = LLVMAtomicRMWBinOpUMin; - break; - case nir_intrinsic_var_atomic_umax: - case nir_intrinsic_shared_atomic_umax: - op = LLVMAtomicRMWBinOpUMax; - break; - case nir_intrinsic_var_atomic_imin: - case nir_intrinsic_shared_atomic_imin: - op = LLVMAtomicRMWBinOpMin; - break; - case nir_intrinsic_var_atomic_imax: - case nir_intrinsic_shared_atomic_imax: - op = LLVMAtomicRMWBinOpMax; - break; - case nir_intrinsic_var_atomic_and: - case nir_intrinsic_shared_atomic_and: - op = LLVMAtomicRMWBinOpAnd; - break; - case nir_intrinsic_var_atomic_or: - case nir_intrinsic_shared_atomic_or: - op = LLVMAtomicRMWBinOpOr; - break; - case nir_intrinsic_var_atomic_xor: - case nir_intrinsic_shared_atomic_xor: - op = LLVMAtomicRMWBinOpXor; - break; - case nir_intrinsic_var_atomic_exchange: - case nir_intrinsic_shared_atomic_exchange: - op = LLVMAtomicRMWBinOpXchg; - break; - default: - return NULL; - } + return ac_build_buffer_load_format(&ctx->ac, + args->resource, + args->addr, + ctx->ac.i32_0, + util_last_bit(mask), + false, true); + } - result = LLVMBuildAtomicRMW(ctx->ac.builder, op, ptr, ac_to_integer(&ctx->ac, src), - LLVMAtomicOrderingSequentiallyConsistent, - false); + args->opcode = ac_image_sample; + args->compare = instr->is_shadow; + + switch (instr->op) { + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_samples_identical: + args->opcode = lod_is_zero || + instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? + ac_image_load : ac_image_load_mip; + args->compare = false; + args->offset = false; + break; + case nir_texop_txb: + args->bias = true; + break; + case nir_texop_txl: + if (lod_is_zero) + args->level_zero = true; + else + args->lod = true; + break; + case nir_texop_txs: + case nir_texop_query_levels: + args->opcode = ac_image_get_resinfo; + break; + case nir_texop_tex: + if (ctx->stage != MESA_SHADER_FRAGMENT) + args->level_zero = true; + break; + case nir_texop_txd: + args->deriv = true; + break; + case nir_texop_tg4: + args->opcode = ac_image_gather4; + args->level_zero = true; + break; + case nir_texop_lod: + args->opcode = ac_image_get_lod; + args->compare = false; + args->offset = false; + break; + default: + break; + } + + if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) { + enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); + if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) { + return radv_lower_gather4_integer(&ctx->ac, args, instr); + } } - return result; + return ac_build_image_opcode(&ctx->ac, args); } -static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi, - enum glsl_interp_mode interp, unsigned location) +static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef ptr = get_src(ctx, instr->src[0]); + LLVMValueRef index = get_src(ctx, instr->src[1]); - switch (interp) { - case INTERP_MODE_FLAT: - default: - return NULL; - case INTERP_MODE_SMOOTH: - case INTERP_MODE_NONE: - if (location == INTERP_CENTER) - return ctx->persp_center; - else if (location == INTERP_CENTROID) - return ctx->persp_centroid; - else if (location == INTERP_SAMPLE) - return ctx->persp_sample; - break; - case INTERP_MODE_NOPERSPECTIVE: - if (location == INTERP_CENTER) - return ctx->linear_center; - else if (location == INTERP_CENTROID) - return ctx->linear_centroid; - else if (location == INTERP_SAMPLE) - return ctx->linear_sample; - break; - } - return NULL; + LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); + LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + return result; } -static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, - LLVMValueRef sample_id) +static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef ptr, addr; - LLVMValueRef result; - LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false)); + addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0); + addr = LLVMBuildAdd(ctx->ac.builder, addr, + get_src(ctx, instr->src[0]), ""); + + ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr); + ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa)); - ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, - ac_array_in_const_addr_space(ctx->ac.v2f32), ""); + return LLVMBuildLoad(ctx->ac.builder, ptr, ""); +} - sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id, ctx->sample_pos_offset, ""); - result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); +static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef index = get_src(ctx, instr->src[0]); - return result; + return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false); } -static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) +static uint32_t widen_mask(uint32_t mask, unsigned multiplier) { - LLVMValueRef values[2]; - LLVMValueRef pos[2]; + uint32_t new_mask = 0; + for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i) + if (mask & (1u << i)) + new_mask |= ((1u << multiplier) - 1u) << (i * multiplier); + return new_mask; +} - pos[0] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[0]); - pos[1] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[1]); +static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src, + unsigned start, unsigned count) +{ + LLVMTypeRef type = LLVMTypeOf(src); - values[0] = ac_build_fract(&ctx->ac, pos[0], 32); - values[1] = ac_build_fract(&ctx->ac, pos[1], 32); - return ac_build_gather_values(&ctx->ac, values, 2); + if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) { + assert(start == 0); + assert(count == 1); + return src; + } + + unsigned src_elements = LLVMGetVectorSize(type); + assert(start < src_elements); + assert(start + count <= src_elements); + + if (start == 0 && count == src_elements) + return src; + + if (count == 1) + return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), ""); + + assert(count <= 8); + LLVMValueRef indices[8]; + for (unsigned i = 0; i < count; ++i) + indices[i] = LLVMConstInt(ctx->i32, start + i, false); + + LLVMValueRef swizzle = LLVMConstVector(indices, count); + return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, ""); } -static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) +static void visit_store_ssbo(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - uint8_t log2_ps_iter_samples = ctx->shader_info->info.ps.force_persample ? - ctx->options->key.fs.log2_num_samples : - ctx->options->key.fs.log2_ps_iter_samples; - - /* The bit pattern matches that used by fixed function fragment - * processing. */ - static const uint16_t ps_iter_masks[] = { - 0xffff, /* not used */ - 0x5555, - 0x1111, - 0x0101, - 0x0001, - }; - assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks)); + const char *store_name; + LLVMValueRef src_data = get_src(ctx, instr->src[0]); + LLVMTypeRef data_type = ctx->ac.f32; + int elem_size_mult = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32; + int components_32bit = elem_size_mult * instr->num_components; + unsigned writemask = nir_intrinsic_write_mask(instr); + LLVMValueRef base_data, base_offset; + LLVMValueRef params[6]; + + params[1] = ctx->abi->load_ssbo(ctx->abi, + get_src(ctx, instr->src[1]), true); + params[2] = ctx->ac.i32_0; /* vindex */ + params[4] = ctx->ac.i1false; /* glc */ + params[5] = ctx->ac.i1false; /* slc */ + + if (components_32bit > 1) + data_type = LLVMVectorType(ctx->ac.f32, components_32bit); - uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples]; + writemask = widen_mask(writemask, elem_size_mult); - LLVMValueRef result, sample_id; - sample_id = ac_unpack_param(&ctx->ac, abi->ancillary, 8, 4); - sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, ""); - result = LLVMBuildAnd(ctx->ac.builder, sample_id, abi->sample_coverage, ""); - return result; + base_data = ac_to_float(&ctx->ac, src_data); + base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components); + base_data = LLVMBuildBitCast(ctx->ac.builder, base_data, + data_type, ""); + base_offset = get_src(ctx, instr->src[2]); /* voffset */ + while (writemask) { + int start, count; + LLVMValueRef data; + LLVMValueRef offset; + + u_bit_scan_consecutive_range(&writemask, &start, &count); + + /* Due to an LLVM limitation, split 3-element writes + * into a 2-element and a 1-element write. */ + if (count == 3) { + writemask |= 1 << (start + 2); + count = 2; + } + + if (count > 4) { + writemask |= ((1u << (count - 4)) - 1u) << (start + 4); + count = 4; + } + + if (count == 4) { + store_name = "llvm.amdgcn.buffer.store.v4f32"; + } else if (count == 2) { + store_name = "llvm.amdgcn.buffer.store.v2f32"; + + } else { + assert(count == 1); + store_name = "llvm.amdgcn.buffer.store.f32"; + } + data = extract_vector_range(&ctx->ac, base_data, start, count); + + offset = base_offset; + if (start != 0) { + offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), ""); + } + params[0] = data; + params[3] = offset; + ac_build_intrinsic(&ctx->ac, store_name, + ctx->ac.voidt, params, 6, 0); + } } -static LLVMValueRef visit_interp(struct ac_nir_context *ctx, - const nir_intrinsic_instr *instr) +static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { - LLVMValueRef result[4]; - LLVMValueRef interp_param, attr_number; - unsigned location; - unsigned chan; - LLVMValueRef src_c0 = NULL; - LLVMValueRef src_c1 = NULL; - LLVMValueRef src0 = NULL; - int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0; + const char *name; + LLVMValueRef params[6]; + int arg_count = 0; + + if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) { + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0); + } + params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0); + params[arg_count++] = ctx->abi->load_ssbo(ctx->abi, + get_src(ctx, instr->src[0]), + true); + params[arg_count++] = ctx->ac.i32_0; /* vindex */ + params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */ + params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */ + switch (instr->intrinsic) { - case nir_intrinsic_interp_var_at_centroid: - location = INTERP_CENTROID; + case nir_intrinsic_ssbo_atomic_add: + name = "llvm.amdgcn.buffer.atomic.add"; break; - case nir_intrinsic_interp_var_at_sample: - case nir_intrinsic_interp_var_at_offset: - location = INTERP_CENTER; - src0 = get_src(ctx, instr->src[0]); + case nir_intrinsic_ssbo_atomic_imin: + name = "llvm.amdgcn.buffer.atomic.smin"; break; - default: + case nir_intrinsic_ssbo_atomic_umin: + name = "llvm.amdgcn.buffer.atomic.umin"; + break; + case nir_intrinsic_ssbo_atomic_imax: + name = "llvm.amdgcn.buffer.atomic.smax"; + break; + case nir_intrinsic_ssbo_atomic_umax: + name = "llvm.amdgcn.buffer.atomic.umax"; + break; + case nir_intrinsic_ssbo_atomic_and: + name = "llvm.amdgcn.buffer.atomic.and"; + break; + case nir_intrinsic_ssbo_atomic_or: + name = "llvm.amdgcn.buffer.atomic.or"; break; + case nir_intrinsic_ssbo_atomic_xor: + name = "llvm.amdgcn.buffer.atomic.xor"; + break; + case nir_intrinsic_ssbo_atomic_exchange: + name = "llvm.amdgcn.buffer.atomic.swap"; + break; + case nir_intrinsic_ssbo_atomic_comp_swap: + name = "llvm.amdgcn.buffer.atomic.cmpswap"; + break; + default: + abort(); } - if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) { - src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_0, "")); - src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_1, "")); - } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) { - LLVMValueRef sample_position; - LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f); - - /* fetch sample ID */ - sample_position = ctx->abi->load_sample_position(ctx->abi, src0); - - src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_0, ""); - src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, ""); - src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_1, ""); - src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, ""); - } - interp_param = ctx->abi->lookup_interp_param(ctx->abi, instr->variables[0]->var->data.interpolation, location); - attr_number = LLVMConstInt(ctx->ac.i32, input_index, false); + return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0); +} - if (location == INTERP_CENTER) { - LLVMValueRef ij_out[2]; - LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param); +static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef results[2]; + int load_components; + int num_components = instr->num_components; + if (instr->dest.ssa.bit_size == 64) + num_components *= 2; - /* - * take the I then J parameters, and the DDX/Y for it, and - * calculate the IJ inputs for the interpolator. - * temp1 = ddx * offset/sample.x + I; - * interp_param.I = ddy * offset/sample.y + temp1; - * temp1 = ddx * offset/sample.x + J; - * interp_param.J = ddy * offset/sample.y + temp1; - */ - for (unsigned i = 0; i < 2; i++) { - LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false); - LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false); - LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, - ddxy_out, ix_ll, ""); - LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, - ddxy_out, iy_ll, ""); - LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, - interp_param, ix_ll, ""); - LLVMValueRef temp1, temp2; + for (int i = 0; i < num_components; i += load_components) { + load_components = MIN2(num_components - i, 4); + const char *load_name; + LLVMTypeRef data_type = ctx->ac.f32; + LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false); + offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, ""); - interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, - ctx->ac.f32, ""); + if (load_components == 3) + data_type = LLVMVectorType(ctx->ac.f32, 4); + else if (load_components > 1) + data_type = LLVMVectorType(ctx->ac.f32, load_components); - temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, src_c0, ""); - temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, ""); + if (load_components >= 3) + load_name = "llvm.amdgcn.buffer.load.v4f32"; + else if (load_components == 2) + load_name = "llvm.amdgcn.buffer.load.v2f32"; + else if (load_components == 1) + load_name = "llvm.amdgcn.buffer.load.f32"; + else + unreachable("unhandled number of components"); - temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, src_c1, ""); - temp2 = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, ""); + LLVMValueRef params[] = { + ctx->abi->load_ssbo(ctx->abi, + get_src(ctx, instr->src[0]), + false), + ctx->ac.i32_0, + offset, + ctx->ac.i1false, + ctx->ac.i1false, + }; - ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, - temp2, ctx->ac.i32, ""); - } - interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); + results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0); + } + + assume(results[0]); + LLVMValueRef ret = results[0]; + if (num_components > 4 || num_components == 3) { + LLVMValueRef masks[] = { + LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), + LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), + LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false), + LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false) + }; + LLVMValueRef swizzle = LLVMConstVector(masks, num_components); + ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0], + results[num_components > 4 ? 1 : 0], swizzle, ""); } - for (chan = 0; chan < 4; chan++) { - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); + return LLVMBuildBitCast(ctx->ac.builder, ret, + get_def_type(ctx, &instr->dest.ssa), ""); +} - if (interp_param) { - interp_param = LLVMBuildBitCast(ctx->ac.builder, - interp_param, ctx->ac.v2f32, ""); - LLVMValueRef i = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->ac.i32_0, ""); - LLVMValueRef j = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->ac.i32_1, ""); +static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef ret; + LLVMValueRef rsrc = get_src(ctx, instr->src[0]); + LLVMValueRef offset = get_src(ctx, instr->src[1]); + int num_components = instr->num_components; - result[chan] = ac_build_fs_interp(&ctx->ac, - llvm_chan, attr_number, - ctx->abi->prim_mask, i, j); - } else { - result[chan] = ac_build_fs_interp_mov(&ctx->ac, - LLVMConstInt(ctx->ac.i32, 2, false), - llvm_chan, attr_number, - ctx->abi->prim_mask); - } - } - return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components, - instr->variables[0]->var->data.location_frac); + if (ctx->abi->load_ubo) + rsrc = ctx->abi->load_ubo(ctx->abi, rsrc); + + if (instr->dest.ssa.bit_size == 64) + num_components *= 2; + + ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, + NULL, 0, false, false, true, true); + ret = ac_trim_vector(&ctx->ac, ret, num_components); + return LLVMBuildBitCast(ctx->ac.builder, ret, + get_def_type(ctx, &instr->dest.ssa), ""); } static void -visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) +get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref, + bool vs_in, unsigned *vertex_index_out, + LLVMValueRef *vertex_index_ref, + unsigned *const_out, LLVMValueRef *indir_out) { - LLVMValueRef gs_next_vertex; - LLVMValueRef can_emit; - int idx; - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + unsigned const_offset = 0; + nir_deref *tail = &deref->deref; + LLVMValueRef offset = NULL; - assert(stream == 0); + if (vertex_index_out != NULL || vertex_index_ref != NULL) { + tail = tail->child; + nir_deref_array *deref_array = nir_deref_as_array(tail); + if (vertex_index_out) + *vertex_index_out = deref_array->base_offset; - /* Write vertex attribute values to GSVS ring */ - gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, - ctx->gs_next_vertex, - ""); + if (vertex_index_ref) { + LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), ""); + } + *vertex_index_ref = vtx; + } + } - /* If this thread has already emitted the declared maximum number of - * vertices, kill it: excessive vertex emissions are not supposed to - * have any effect, and GS threads have no externally observable - * effects other than emitting vertices. - */ - can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, - LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), ""); - ac_build_kill_if_false(&ctx->ac, can_emit); - - /* loop num outputs */ - idx = 0; - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &addrs[i * 4]; - int length = 4; - int slot = idx; - int slot_inc = 1; - - if (!(ctx->output_mask & (1ull << i))) - continue; + if (deref->var->data.compact) { + assert(tail->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + /* We always lower indirect dereferences for "compact" array vars. */ + assert(deref_array->deref_array_type == nir_deref_array_type_direct); - if (i == VARYING_SLOT_CLIP_DIST0) { - /* pack clip and cull into a single set of slots */ - length = ctx->num_output_clips + ctx->num_output_culls; - if (length > 4) - slot_inc = 2; - } - for (unsigned j = 0; j < length; j++) { - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, - out_ptr[j], ""); - LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false); - voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); - voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); - - out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); - - ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, - out_val, 1, - voffset, ctx->gs2vs_offset, 0, - 1, 1, true, true); - } - idx += slot_inc; + const_offset = deref_array->base_offset; + goto out; } - gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, - ctx->ac.i32_1, ""); - LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex); + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); -} + if (tail->deref_type == nir_deref_type_array) { + nir_deref_array *deref_array = nir_deref_as_array(tail); + LLVMValueRef index, stride, local_offset; + unsigned size = glsl_count_attribute_slots(tail->type, vs_in); -static void -visit_end_primitive(struct ac_shader_abi *abi, unsigned stream) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); -} + const_offset += size * deref_array->base_offset; + if (deref_array->deref_array_type == nir_deref_array_type_direct) + continue; -static LLVMValueRef -load_tess_coord(struct ac_shader_abi *abi) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + assert(deref_array->deref_array_type == nir_deref_array_type_indirect); + index = get_src(ctx, deref_array->indirect); + stride = LLVMConstInt(ctx->ac.i32, size, 0); + local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, ""); - LLVMValueRef coord[4] = { - ctx->tes_u, - ctx->tes_v, - ctx->ac.f32_0, - ctx->ac.f32_0, - }; + if (offset) + offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, ""); + else + offset = local_offset; + } else if (tail->deref_type == nir_deref_type_struct) { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + for (unsigned i = 0; i < deref_struct->index; i++) { + const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); + const_offset += glsl_count_attribute_slots(ft, vs_in); + } + } else + unreachable("unsupported deref type"); - if (ctx->tes_primitive_mode == GL_TRIANGLES) - coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1, - LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), ""); + } +out: + if (const_offset && offset) + offset = LLVMBuildAdd(ctx->ac.builder, offset, + LLVMConstInt(ctx->ac.i32, const_offset, 0), + ""); - return ac_build_gather_values(&ctx->ac, coord, 3); + *const_out = const_offset; + *indir_out = offset; } static LLVMValueRef -load_patch_vertices_in(struct ac_shader_abi *abi) +build_gep_for_deref(struct ac_nir_context *ctx, + nir_deref_var *deref) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false); + struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var); + assert(entry->data); + LLVMValueRef val = entry->data; + nir_deref *tail = deref->deref.child; + while (tail != NULL) { + LLVMValueRef offset; + switch (tail->deref_type) { + case nir_deref_type_array: { + nir_deref_array *array = nir_deref_as_array(tail); + offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0); + if (array->deref_array_type == + nir_deref_array_type_indirect) { + offset = LLVMBuildAdd(ctx->ac.builder, offset, + get_src(ctx, + array->indirect), + ""); + } + break; + } + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = + nir_deref_as_struct(tail); + offset = LLVMConstInt(ctx->ac.i32, + deref_struct->index, 0); + break; + } + default: + unreachable("bad deref type"); + } + val = ac_build_gep0(&ctx->ac, val, offset); + tail = tail->child; + } + return val; } -static void visit_intrinsic(struct ac_nir_context *ctx, - nir_intrinsic_instr *instr) +static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr, + bool load_inputs) { - LLVMValueRef result = NULL; + LLVMValueRef result; + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + unsigned location = instr->variables[0]->var->data.location; + unsigned driver_location = instr->variables[0]->var->data.driver_location; + const bool is_patch = instr->variables[0]->var->data.patch; + const bool is_compact = instr->variables[0]->var->data.compact; - switch (instr->intrinsic) { - case nir_intrinsic_ballot: - result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0])); - break; - case nir_intrinsic_read_invocation: - case nir_intrinsic_read_first_invocation: { - LLVMValueRef args[2]; + get_deref_offset(ctx, instr->variables[0], + false, NULL, is_patch ? NULL : &vertex_index, + &const_index, &indir_index); - /* Value */ - args[0] = get_src(ctx, instr->src[0]); + LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa); + + LLVMTypeRef src_component_type; + if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind) + src_component_type = LLVMGetElementType(dest_type); + else + src_component_type = dest_type; + + result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type, + vertex_index, indir_index, + const_index, location, driver_location, + instr->variables[0]->var->data.location_frac, + instr->num_components, + is_patch, is_compact, load_inputs); + return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, ""); +} + +static LLVMValueRef visit_load_var(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) +{ + LLVMValueRef values[8]; + int idx = instr->variables[0]->var->data.driver_location; + int ve = instr->dest.ssa.num_components; + unsigned comp = instr->variables[0]->var->data.location_frac; + LLVMValueRef indir_index; + LLVMValueRef ret; + unsigned const_index; + unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4; + bool vs_in = ctx->stage == MESA_SHADER_VERTEX && + instr->variables[0]->var->data.mode == nir_var_shader_in; + get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL, + &const_index, &indir_index); - unsigned num_args; - const char *intr_name; - if (instr->intrinsic == nir_intrinsic_read_invocation) { - num_args = 2; - intr_name = "llvm.amdgcn.readlane"; + if (instr->dest.ssa.bit_size == 64) + ve *= 2; - /* Invocation */ - args[1] = get_src(ctx, instr->src[1]); - } else { - num_args = 1; - intr_name = "llvm.amdgcn.readfirstlane"; + switch (instr->variables[0]->var->data.mode) { + case nir_var_shader_in: + if (ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_TESS_EVAL) { + return load_tess_varyings(ctx, instr, true); } - /* We currently have no other way to prevent LLVM from lifting the icmp - * calls to a dominating basic block. - */ - ac_build_optimization_barrier(&ctx->ac, &args[0]); - - result = ac_build_intrinsic(&ctx->ac, intr_name, - ctx->ac.i32, args, num_args, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); - break; - } - case nir_intrinsic_load_subgroup_invocation: - result = ac_get_thread_id(&ctx->ac); - break; - case nir_intrinsic_load_work_group_id: { - LLVMValueRef values[3]; + if (ctx->stage == MESA_SHADER_GEOMETRY) { + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size); + LLVMValueRef indir_index; + unsigned const_index, vertex_index; + get_deref_offset(ctx, instr->variables[0], + false, &vertex_index, NULL, + &const_index, &indir_index); - for (int i = 0; i < 3; i++) { - values[i] = ctx->abi->workgroup_ids[i] ? - ctx->abi->workgroup_ids[i] : ctx->ac.i32_0; + return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location, + instr->variables[0]->var->data.driver_location, + instr->variables[0]->var->data.location_frac, + instr->num_components, vertex_index, const_index, type); } - result = ac_build_gather_values(&ctx->ac, values, 3); - break; - } - case nir_intrinsic_load_base_vertex: { - result = ctx->abi->load_base_vertex(ctx->abi); - break; - } - case nir_intrinsic_load_local_group_size: - result = ctx->abi->load_local_group_size(ctx->abi); - break; - case nir_intrinsic_load_vertex_id: - result = LLVMBuildAdd(ctx->ac.builder, ctx->abi->vertex_id, - ctx->abi->base_vertex, ""); - break; - case nir_intrinsic_load_vertex_id_zero_base: { - result = ctx->abi->vertex_id; - break; - } - case nir_intrinsic_load_local_invocation_id: { - result = ctx->abi->local_invocation_ids; - break; - } - case nir_intrinsic_load_base_instance: - result = ctx->abi->start_instance; - break; - case nir_intrinsic_load_draw_id: - result = ctx->abi->draw_id; - break; - case nir_intrinsic_load_view_index: - result = ctx->abi->view_index; - break; - case nir_intrinsic_load_invocation_id: - if (ctx->stage == MESA_SHADER_TESS_CTRL) - result = ac_unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5); - else - result = ctx->abi->gs_invocation_id; - break; - case nir_intrinsic_load_primitive_id: - if (ctx->stage == MESA_SHADER_GEOMETRY) { - result = ctx->abi->gs_prim_id; - } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { - result = ctx->abi->tcs_patch_id; - } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - result = ctx->abi->tes_patch_id; - } else - fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); - break; - case nir_intrinsic_load_sample_id: - result = ac_unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4); - break; - case nir_intrinsic_load_sample_pos: - result = load_sample_pos(ctx); - break; - case nir_intrinsic_load_sample_mask_in: - result = ctx->abi->load_sample_mask_in(ctx->abi); - break; - case nir_intrinsic_load_frag_coord: { - LLVMValueRef values[4] = { - ctx->abi->frag_pos[0], - ctx->abi->frag_pos[1], - ctx->abi->frag_pos[2], - ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3]) - }; - result = ac_build_gather_values(&ctx->ac, values, 4); - break; - } - case nir_intrinsic_load_front_face: - result = ctx->abi->front_face; - break; - case nir_intrinsic_load_helper_invocation: - result = visit_load_helper_invocation(ctx); - break; - case nir_intrinsic_load_instance_id: - result = ctx->abi->instance_id; - break; - case nir_intrinsic_load_num_work_groups: - result = ctx->abi->num_work_groups; - break; - case nir_intrinsic_load_local_invocation_index: - result = visit_load_local_invocation_index(ctx); - break; - case nir_intrinsic_load_subgroup_id: - result = visit_load_subgroup_id(ctx); - break; - case nir_intrinsic_load_num_subgroups: - result = visit_load_num_subgroups(ctx); - break; - case nir_intrinsic_first_invocation: - result = visit_first_invocation(ctx); - break; - case nir_intrinsic_load_push_constant: - result = visit_load_push_constant(ctx, instr); - break; - case nir_intrinsic_vulkan_resource_index: { - LLVMValueRef index = get_src(ctx, instr->src[0]); - unsigned desc_set = nir_intrinsic_desc_set(instr); - unsigned binding = nir_intrinsic_binding(instr); + for (unsigned chan = comp; chan < ve + comp; chan++) { + if (indir_index) { + unsigned count = glsl_count_attribute_slots( + instr->variables[0]->var->type, + ctx->stage == MESA_SHADER_VERTEX); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->abi->inputs + idx + chan, count, + stride, false, true); - result = ctx->abi->load_resource(ctx->abi, index, desc_set, - binding); - break; - } - case nir_intrinsic_vulkan_resource_reindex: - result = visit_vulkan_resource_reindex(ctx, instr); - break; - case nir_intrinsic_store_ssbo: - visit_store_ssbo(ctx, instr); - break; - case nir_intrinsic_load_ssbo: - result = visit_load_buffer(ctx, instr); - break; - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: - result = visit_atomic_ssbo(ctx, instr); - break; - case nir_intrinsic_load_ubo: - result = visit_load_ubo_buffer(ctx, instr); - break; - case nir_intrinsic_get_buffer_size: - result = visit_get_buffer_size(ctx, instr); - break; - case nir_intrinsic_load_var: - result = visit_load_var(ctx, instr); - break; - case nir_intrinsic_store_var: - visit_store_var(ctx, instr); - break; - case nir_intrinsic_load_shared: - result = visit_load_shared(ctx, instr); - break; - case nir_intrinsic_store_shared: - visit_store_shared(ctx, instr); - break; - case nir_intrinsic_image_samples: - result = visit_image_samples(ctx, instr); - break; - case nir_intrinsic_image_load: - result = visit_image_load(ctx, instr); - break; - case nir_intrinsic_image_store: - visit_image_store(ctx, instr); - break; - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_min: - case nir_intrinsic_image_atomic_max: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: - result = visit_image_atomic(ctx, instr); - break; - case nir_intrinsic_image_size: - result = visit_image_size(ctx, instr); - break; - case nir_intrinsic_shader_clock: - result = ac_build_shader_clock(&ctx->ac); - break; - case nir_intrinsic_discard: - case nir_intrinsic_discard_if: - emit_discard(ctx, instr); - break; - case nir_intrinsic_memory_barrier: - case nir_intrinsic_group_memory_barrier: - case nir_intrinsic_memory_barrier_atomic_counter: - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_memory_barrier_image: - case nir_intrinsic_memory_barrier_shared: - emit_membar(&ctx->ac, instr); - break; - case nir_intrinsic_barrier: - ac_emit_barrier(&ctx->ac, ctx->stage); - break; - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: { - LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]); - result = visit_var_atomic(ctx, instr, ptr, 1); + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, + tmp_vec, + indir_index, ""); + } else + values[chan] = ctx->abi->inputs[idx + chan + const_index * stride]; + } break; - } - case nir_intrinsic_var_atomic_add: - case nir_intrinsic_var_atomic_imin: - case nir_intrinsic_var_atomic_umin: - case nir_intrinsic_var_atomic_imax: - case nir_intrinsic_var_atomic_umax: - case nir_intrinsic_var_atomic_and: - case nir_intrinsic_var_atomic_or: - case nir_intrinsic_var_atomic_xor: - case nir_intrinsic_var_atomic_exchange: - case nir_intrinsic_var_atomic_comp_swap: { - LLVMValueRef ptr = build_gep_for_deref(ctx, instr->variables[0]); - result = visit_var_atomic(ctx, instr, ptr, 0); + case nir_var_local: + for (unsigned chan = 0; chan < ve; chan++) { + if (indir_index) { + unsigned count = glsl_count_attribute_slots( + instr->variables[0]->var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->locals + idx + chan, count, + stride, true, true); + + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, + tmp_vec, + indir_index, ""); + } else { + values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], ""); + } + } break; + case nir_var_shared: { + LLVMValueRef address = build_gep_for_deref(ctx, + instr->variables[0]); + LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, ""); + return LLVMBuildBitCast(ctx->ac.builder, val, + get_def_type(ctx, &instr->dest.ssa), + ""); } - case nir_intrinsic_interp_var_at_centroid: - case nir_intrinsic_interp_var_at_sample: - case nir_intrinsic_interp_var_at_offset: - result = visit_interp(ctx, instr); - break; - case nir_intrinsic_emit_vertex: - ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); - break; - case nir_intrinsic_end_primitive: - ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); - break; - case nir_intrinsic_load_tess_coord: - result = ctx->abi->load_tess_coord(ctx->abi); - break; - case nir_intrinsic_load_tess_level_outer: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER); - break; - case nir_intrinsic_load_tess_level_inner: - result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER); + case nir_var_shader_out: + if (ctx->stage == MESA_SHADER_TESS_CTRL) { + return load_tess_varyings(ctx, instr, false); + } + + for (unsigned chan = comp; chan < ve + comp; chan++) { + if (indir_index) { + unsigned count = glsl_count_attribute_slots( + instr->variables[0]->var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->abi->outputs + idx + chan, count, + stride, true, true); + + values[chan] = LLVMBuildExtractElement(ctx->ac.builder, + tmp_vec, + indir_index, ""); + } else { + values[chan] = LLVMBuildLoad(ctx->ac.builder, + ctx->abi->outputs[idx + chan + const_index * stride], + ""); + } + } break; - case nir_intrinsic_load_patch_vertices_in: - result = ctx->abi->load_patch_vertices_in(ctx->abi); + default: + unreachable("unhandle variable mode"); + } + ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp); + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); +} + +static void +visit_store_var(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) +{ + LLVMValueRef temp_ptr, value; + int idx = instr->variables[0]->var->data.driver_location; + unsigned comp = instr->variables[0]->var->data.location_frac; + LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0])); + int writemask = instr->const_index[0] << comp; + LLVMValueRef indir_index; + unsigned const_index; + get_deref_offset(ctx, instr->variables[0], false, + NULL, NULL, &const_index, &indir_index); + + if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) { + + src = LLVMBuildBitCast(ctx->ac.builder, src, + LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2), + ""); + + writemask = widen_mask(writemask, 2); + } + + switch (instr->variables[0]->var->data.mode) { + case nir_var_shader_out: + + if (ctx->stage == MESA_SHADER_TESS_CTRL) { + LLVMValueRef vertex_index = NULL; + LLVMValueRef indir_index = NULL; + unsigned const_index = 0; + const unsigned location = instr->variables[0]->var->data.location; + const unsigned driver_location = instr->variables[0]->var->data.driver_location; + const unsigned comp = instr->variables[0]->var->data.location_frac; + const bool is_patch = instr->variables[0]->var->data.patch; + const bool is_compact = instr->variables[0]->var->data.compact; + + get_deref_offset(ctx, instr->variables[0], + false, NULL, is_patch ? NULL : &vertex_index, + &const_index, &indir_index); + + ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, + const_index, location, driver_location, + src, comp, is_patch, is_compact, writemask); + return; + } + + for (unsigned chan = 0; chan < 8; chan++) { + int stride = 4; + if (!(writemask & (1 << chan))) + continue; + + value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp); + + if (instr->variables[0]->var->data.compact) + stride = 1; + if (indir_index) { + unsigned count = glsl_count_attribute_slots( + instr->variables[0]->var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->abi->outputs + idx + chan, count, + stride, true, true); + + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, + value, indir_index, ""); + build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan, + count, stride, tmp_vec); + + } else { + temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride]; + + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); + } + } break; - case nir_intrinsic_vote_all: { - LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0])); - result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + case nir_var_local: + for (unsigned chan = 0; chan < 8; chan++) { + if (!(writemask & (1 << chan))) + continue; + + value = ac_llvm_extract_elem(&ctx->ac, src, chan); + if (indir_index) { + unsigned count = glsl_count_attribute_slots( + instr->variables[0]->var->type, false); + count -= chan / 4; + LLVMValueRef tmp_vec = ac_build_gather_values_extended( + &ctx->ac, ctx->locals + idx + chan, count, + 4, true, true); + + tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec, + value, indir_index, ""); + build_store_values_extended(&ctx->ac, ctx->locals + idx + chan, + count, 4, tmp_vec); + } else { + temp_ptr = ctx->locals[idx + chan + const_index * 4]; + + LLVMBuildStore(ctx->ac.builder, value, temp_ptr); + } + } break; - } - case nir_intrinsic_vote_any: { - LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0])); - result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + case nir_var_shared: { + int writemask = instr->const_index[0]; + LLVMValueRef address = build_gep_for_deref(ctx, + instr->variables[0]); + LLVMValueRef val = get_src(ctx, instr->src[0]); + unsigned components = + glsl_get_vector_elements( + nir_deref_tail(&instr->variables[0]->deref)->type); + if (writemask == (1 << components) - 1) { + val = LLVMBuildBitCast( + ctx->ac.builder, val, + LLVMGetElementType(LLVMTypeOf(address)), ""); + LLVMBuildStore(ctx->ac.builder, val, address); + } else { + for (unsigned chan = 0; chan < 4; chan++) { + if (!(writemask & (1 << chan))) + continue; + LLVMValueRef ptr = + LLVMBuildStructGEP(ctx->ac.builder, + address, chan, ""); + LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val, + chan); + src = LLVMBuildBitCast( + ctx->ac.builder, src, + LLVMGetElementType(LLVMTypeOf(ptr)), ""); + LLVMBuildStore(ctx->ac.builder, src, ptr); + } + } break; } default: - fprintf(stderr, "Unknown intrinsic: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); break; } - if (result) { - _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); - } } -static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi) -{ - return abi->base_vertex; -} - -static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi, - LLVMValueRef buffer_ptr, bool write) +static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef result; - - LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); - - result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); - LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); - - return result; + switch (dim) { + case GLSL_SAMPLER_DIM_BUF: + return 1; + case GLSL_SAMPLER_DIM_1D: + return array ? 2 : 1; + case GLSL_SAMPLER_DIM_2D: + return array ? 3 : 2; + case GLSL_SAMPLER_DIM_MS: + return array ? 4 : 3; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + return 3; + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_SUBPASS: + return 2; + case GLSL_SAMPLER_DIM_SUBPASS_MS: + return 3; + default: + break; + } + return 0; } -static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr) +static bool +glsl_is_array_image(const struct glsl_type *type) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef result; - - LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); - result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); - LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); + if (glsl_sampler_type_is_array(type)) + return true; - return result; + return dim == GLSL_SAMPLER_DIM_CUBE || + dim == GLSL_SAMPLER_DIM_3D || + dim == GLSL_SAMPLER_DIM_SUBPASS || + dim == GLSL_SAMPLER_DIM_SUBPASS_MS; } -static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, - unsigned descriptor_set, - unsigned base_index, - unsigned constant_index, - LLVMValueRef index, - enum ac_descriptor_type desc_type, - bool image, bool write) -{ - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; - struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout; - struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; - unsigned offset = binding->offset; - unsigned stride = binding->size; - unsigned type_size; - LLVMBuilderRef builder = ctx->ac.builder; - LLVMTypeRef type; - - assert(base_index < layout->binding_count); - - switch (desc_type) { - case AC_DESC_IMAGE: - type = ctx->ac.v8i32; - type_size = 32; - break; - case AC_DESC_FMASK: - type = ctx->ac.v8i32; - offset += 32; - type_size = 32; - break; - case AC_DESC_SAMPLER: - type = ctx->ac.v4i32; - if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) - offset += 64; - - type_size = 16; - break; - case AC_DESC_BUFFER: - type = ctx->ac.v4i32; - type_size = 16; - break; - default: - unreachable("invalid desc_type\n"); - } - - offset += constant_index * stride; - if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && - (!index || binding->immutable_samplers_equal)) { - if (binding->immutable_samplers_equal) - constant_index = 0; - - const uint32_t *samplers = radv_immutable_samplers(layout, binding); +/* Adjust the sample index according to FMASK. + * + * For uncompressed MSAA surfaces, FMASK should return 0x76543210, + * which is the identity mapping. Each nibble says which physical sample + * should be fetched to get that sample. + * + * For example, 0x11111100 means there are only 2 samples stored and + * the second sample covers 3/4 of the pixel. When reading samples 0 + * and 1, return physical sample 0 (determined by the first two 0s + * in FMASK), otherwise return physical sample 1. + * + * The sample index should be adjusted as follows: + * sample_index = (fmask >> (sample_index * 4)) & 0xF; + */ +static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, + LLVMValueRef coord_x, LLVMValueRef coord_y, + LLVMValueRef coord_z, + LLVMValueRef sample_index, + LLVMValueRef fmask_desc_ptr) +{ + LLVMValueRef fmask_load_address[4]; + LLVMValueRef res; - LLVMValueRef constants[] = { - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0), - LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0), - }; - return ac_build_gather_values(&ctx->ac, constants, 4); + fmask_load_address[0] = coord_x; + fmask_load_address[1] = coord_y; + if (coord_z) { + fmask_load_address[2] = coord_z; + fmask_load_address[3] = LLVMGetUndef(ctx->i32); } - assert(stride % type_size == 0); - - if (!index) - index = ctx->ac.i32_0; - - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); - - list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); - list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); - - return ac_build_load_to_sgpr(&ctx->ac, list, index); -} - -static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, - const nir_deref_var *deref, - enum ac_descriptor_type desc_type, - const nir_tex_instr *tex_instr, - bool image, bool write) -{ - LLVMValueRef index = NULL; - unsigned constant_index = 0; - unsigned descriptor_set; - unsigned base_index; + struct ac_image_args args = {0}; - if (!deref) { - assert(tex_instr && !image); - descriptor_set = 0; - base_index = tex_instr->sampler_index; - } else { - const nir_deref *tail = &deref->deref; - while (tail->child) { - const nir_deref_array *child = nir_deref_as_array(tail->child); - unsigned array_size = glsl_get_aoa_size(tail->child->type); + args.opcode = ac_image_load; + args.da = coord_z ? true : false; + args.resource = fmask_desc_ptr; + args.dmask = 0xf; + args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); - if (!array_size) - array_size = 1; + res = ac_build_image_opcode(ctx, &args); - assert(child->deref_array_type != nir_deref_array_type_wildcard); + res = ac_to_integer(ctx, res); + LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false); + LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false); - if (child->deref_array_type == nir_deref_array_type_indirect) { - LLVMValueRef indirect = get_src(ctx, child->indirect); + LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder, + res, + ctx->i32_0, ""); - indirect = LLVMBuildMul(ctx->ac.builder, indirect, - LLVMConstInt(ctx->ac.i32, array_size, false), ""); + LLVMValueRef sample_index4 = + LLVMBuildMul(ctx->builder, sample_index, four, ""); + LLVMValueRef shifted_fmask = + LLVMBuildLShr(ctx->builder, fmask, sample_index4, ""); + LLVMValueRef final_sample = + LLVMBuildAnd(ctx->builder, shifted_fmask, F, ""); - if (!index) - index = indirect; - else - index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); - } + /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK + * resource descriptor is 0 (invalid), + */ + LLVMValueRef fmask_desc = + LLVMBuildBitCast(ctx->builder, fmask_desc_ptr, + ctx->v8i32, ""); - constant_index += child->base_offset * array_size; + LLVMValueRef fmask_word1 = + LLVMBuildExtractElement(ctx->builder, fmask_desc, + ctx->i32_1, ""); - tail = &child->deref; - } - descriptor_set = deref->var->data.descriptor_set; - base_index = deref->var->data.binding; - } + LLVMValueRef word1_is_nonzero = + LLVMBuildICmp(ctx->builder, LLVMIntNE, + fmask_word1, ctx->i32_0, ""); - return ctx->abi->load_sampler_desc(ctx->abi, - descriptor_set, - base_index, - constant_index, index, - desc_type, image, write); + /* Replace the MSAA sample index. */ + sample_index = + LLVMBuildSelect(ctx->builder, word1_is_nonzero, + final_sample, sample_index, ""); + return sample_index; } -static void set_tex_fetch_args(struct ac_llvm_context *ctx, - struct ac_image_args *args, - const nir_tex_instr *instr, - nir_texop op, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) +static LLVMValueRef get_image_coords(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { - unsigned is_rect = 0; - bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; + const struct glsl_type *type = glsl_without_array(instr->variables[0]->var->type); - if (op == nir_texop_lod) - da = false; - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); + LLVMValueRef src0 = get_src(ctx, instr->src[0]); + LLVMValueRef coords[4]; + LLVMValueRef masks[] = { + LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), + LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false), + }; + LLVMValueRef res; + LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0); - if (count > 1) - args->addr = ac_build_gather_values(ctx, param, count); - else - args->addr = param[0]; + int count; + enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); + bool is_array = glsl_sampler_type_is_array(type); + bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS || + dim == GLSL_SAMPLER_DIM_SUBPASS_MS); + bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || + dim == GLSL_SAMPLER_DIM_SUBPASS_MS); + bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D; + count = image_type_to_components_count(dim, is_array); - args->resource = res_ptr; - args->sampler = samp_ptr; + if (is_ms) { + LLVMValueRef fmask_load_address[3]; + int chan; - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { - args->addr = param[0]; - return; + fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); + fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], ""); + if (is_array) + fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], ""); + else + fmask_load_address[2] = NULL; + if (add_frag_pos) { + for (chan = 0; chan < 2; ++chan) + fmask_load_address[chan] = + LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan], + LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan], + ctx->ac.i32, ""), ""); + fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]); + } + sample_index = adjust_sample_index_using_fmask(&ctx->ac, + fmask_load_address[0], + fmask_load_address[1], + fmask_load_address[2], + sample_index, + get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, NULL, true, false)); } + if (count == 1 && !gfx9_1d) { + if (instr->src[0].ssa->num_components) + res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], ""); + else + res = src0; + } else { + int chan; + if (is_ms) + count--; + for (chan = 0; chan < count; ++chan) { + coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan); + } + if (add_frag_pos) { + for (chan = 0; chan < 2; ++chan) + coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan], + ctx->ac.i32, ""), ""); + coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]); + count++; + } - args->dmask = dmask; - args->unorm = is_rect; - args->da = da; -} - -/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. - * - * SI-CI: - * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic - * filtering manually. The driver sets img7 to a mask clearing - * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: - * s_and_b32 samp0, samp0, img7 - * - * VI: - * The ANISO_OVERRIDE sampler field enables this fix in TA. - */ -static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, - LLVMValueRef res, LLVMValueRef samp) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef img7, samp0; - - if (ctx->ac.chip_class >= VI) - return samp; + if (gfx9_1d) { + if (is_array) { + coords[2] = coords[1]; + coords[1] = ctx->ac.i32_0; + } else + coords[1] = ctx->ac.i32_0; + count++; + } - img7 = LLVMBuildExtractElement(builder, res, - LLVMConstInt(ctx->ac.i32, 7, 0), ""); - samp0 = LLVMBuildExtractElement(builder, samp, - LLVMConstInt(ctx->ac.i32, 0, 0), ""); - samp0 = LLVMBuildAnd(builder, samp0, img7, ""); - return LLVMBuildInsertElement(builder, samp, samp0, - LLVMConstInt(ctx->ac.i32, 0, 0), ""); -} + if (is_ms) { + coords[count] = sample_index; + count++; + } -static void tex_fetch_ptrs(struct ac_nir_context *ctx, - nir_tex_instr *instr, - LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, - LLVMValueRef *fmask_ptr) -{ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) - *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, instr, false, false); - else - *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, instr, false, false); - if (samp_ptr) { - if (instr->sampler) - *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, instr, false, false); - else - *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, instr, false, false); - if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) - *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); + if (count == 3) { + coords[3] = LLVMGetUndef(ctx->ac.i32); + count = 4; + } + res = ac_build_gather_values(&ctx->ac, coords, count); } - if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || - instr->op == nir_texop_samples_identical)) - *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false); + return res; } -static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, - LLVMValueRef coord) +static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { - coord = ac_to_float(ctx, coord); - coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0); - coord = ac_to_integer(ctx, coord); - return coord; -} + LLVMValueRef params[7]; + LLVMValueRef res; + char intrinsic_name[64]; + const nir_variable *var = instr->variables[0]->var; + const struct glsl_type *type = var->type; -static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) -{ - LLVMValueRef result = NULL; - struct ac_image_args args = { 0 }; - unsigned dmask = 0xf; - LLVMValueRef address[16]; - LLVMValueRef coords[5]; - LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; - LLVMValueRef bias = NULL, offsets = NULL; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; - LLVMValueRef ddx = NULL, ddy = NULL; - LLVMValueRef derivs[6]; - unsigned chan, count = 0; - unsigned const_src = 0, num_deriv_comp = 0; - bool lod_is_zero = false; + if(instr->variables[0]->deref.child) + type = instr->variables[0]->deref.child->type; - tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); + type = glsl_without_array(type); - for (unsigned i = 0; i < instr->num_srcs; i++) { - switch (instr->src[i].src_type) { - case nir_tex_src_coord: - coord = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_projector: - break; - case nir_tex_src_comparator: - comparator = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_offset: - offsets = get_src(ctx, instr->src[i].src); - const_src = i; - break; - case nir_tex_src_bias: - bias = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_lod: { - nir_const_value *val = nir_src_as_const_value(instr->src[i].src); + const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); + if (dim == GLSL_SAMPLER_DIM_BUF) { + unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa); + unsigned num_channels = util_last_bit(mask); + LLVMValueRef rsrc, vindex; - if (val && val->i32[0] == 0) - lod_is_zero = true; - lod = get_src(ctx, instr->src[i].src); - break; - } - case nir_tex_src_ms_index: - sample_index = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_ms_mcs: - break; - case nir_tex_src_ddx: - ddx = get_src(ctx, instr->src[i].src); - num_deriv_comp = instr->src[i].src.ssa->num_components; - break; - case nir_tex_src_ddy: - ddy = get_src(ctx, instr->src[i].src); - break; - case nir_tex_src_texture_offset: - case nir_tex_src_sampler_offset: - case nir_tex_src_plane: - default: - break; - } - } + rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false); + vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); - if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - result = get_buffer_size(ctx, res_ptr, true); - goto write_result; - } + /* TODO: set "glc" and "can_speculate" when OpenGL needs it. */ + res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, + ctx->ac.i32_0, num_channels, + false, false); + res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels); - if (instr->op == nir_texop_texture_samples) { - LLVMValueRef res, samples, is_msaa; - res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); - samples = LLVMBuildExtractElement(ctx->ac.builder, res, - LLVMConstInt(ctx->ac.i32, 3, false), ""); - is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, - LLVMConstInt(ctx->ac.i32, 28, false), ""); - is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, - LLVMConstInt(ctx->ac.i32, 0xe, false), ""); - is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, - LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components); + res = ac_to_integer(&ctx->ac, res); + } else { + LLVMValueRef da = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false; + LLVMValueRef slc = ctx->ac.i1false; - samples = LLVMBuildLShr(ctx->ac.builder, samples, - LLVMConstInt(ctx->ac.i32, 16, false), ""); - samples = LLVMBuildAnd(ctx->ac.builder, samples, - LLVMConstInt(ctx->ac.i32, 0xf, false), ""); - samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, - samples, ""); - samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, - ctx->ac.i32_1, ""); - result = samples; - goto write_result; + params[0] = get_image_coords(ctx, instr); + params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); + params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ + params[3] = (var->data.image._volatile || var->data.image.coherent) ? + ctx->ac.i1true : ctx->ac.i1false; + params[4] = slc; + params[5] = ctx->ac.i1false; + params[6] = da; + + ac_get_image_intr_name("llvm.amdgcn.image.load", + ctx->ac.v4f32, /* vdata */ + LLVMTypeOf(params[0]), /* coords */ + LLVMTypeOf(params[1]), /* rsrc */ + intrinsic_name, sizeof(intrinsic_name)); + + res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32, + params, 7, AC_FUNC_ATTR_READONLY); } + return ac_to_integer(&ctx->ac, res); +} - if (coord) - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); +static void visit_image_store(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) +{ + LLVMValueRef params[8]; + char intrinsic_name[64]; + const nir_variable *var = instr->variables[0]->var; + const struct glsl_type *type = glsl_without_array(var->type); + const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type); + LLVMValueRef glc = ctx->ac.i1false; + bool force_glc = ctx->ac.chip_class == SI; + if (force_glc) + glc = ctx->ac.i1true; - if (offsets && instr->op != nir_texop_txf) { - LLVMValueRef offset[3], pack; - for (chan = 0; chan < 3; ++chan) - offset[chan] = ctx->ac.i32_0; + if (dim == GLSL_SAMPLER_DIM_BUF) { + params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */ + params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true); + params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[3] = ctx->ac.i32_0; /* voffset */ + params[4] = glc; /* glc */ + params[5] = ctx->ac.i1false; /* slc */ + ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt, + params, 6, 0); + } else { + LLVMValueRef da = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false; + LLVMValueRef slc = ctx->ac.i1false; - args.offset = true; - for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { - offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); - offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], - LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); - if (chan) - offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], - LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); - } - pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); - pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; + params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); + params[1] = get_image_coords(ctx, instr); /* coords */ + params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true); + params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */ + params[4] = (force_glc || var->data.image._volatile || var->data.image.coherent) ? + ctx->ac.i1true : ctx->ac.i1false; + params[5] = slc; + params[6] = ctx->ac.i1false; + params[7] = da; - } - /* pack LOD bias value */ - if (instr->op == nir_texop_txb && bias) { - address[count++] = bias; + ac_get_image_intr_name("llvm.amdgcn.image.store", + LLVMTypeOf(params[0]), /* vdata */ + LLVMTypeOf(params[1]), /* coords */ + LLVMTypeOf(params[2]), /* rsrc */ + intrinsic_name, sizeof(intrinsic_name)); + + ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt, + params, 8, 0); } - /* Pack depth comparison value */ - if (instr->is_shadow && comparator) { - LLVMValueRef z = ac_to_float(&ctx->ac, - ac_llvm_extract_elem(&ctx->ac, comparator, 0)); +} + +static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef params[7]; + int param_count = 0; + const nir_variable *var = instr->variables[0]->var; + + const char *atomic_name; + char intrinsic_name[41]; + const struct glsl_type *type = glsl_without_array(var->type); + MAYBE_UNUSED int length; - /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, - * so the depth comparison value isn't clamped for Z16 and - * Z24 anymore. Do it manually here. - * - * It's unnecessary if the original texture format was - * Z32_FLOAT, but we don't know that here. - */ - if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) - z = ac_build_clamp(&ctx->ac, z); + bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT; - address[count++] = z; + switch (instr->intrinsic) { + case nir_intrinsic_image_atomic_add: + atomic_name = "add"; + break; + case nir_intrinsic_image_atomic_min: + atomic_name = is_unsigned ? "umin" : "smin"; + break; + case nir_intrinsic_image_atomic_max: + atomic_name = is_unsigned ? "umax" : "smax"; + break; + case nir_intrinsic_image_atomic_and: + atomic_name = "and"; + break; + case nir_intrinsic_image_atomic_or: + atomic_name = "or"; + break; + case nir_intrinsic_image_atomic_xor: + atomic_name = "xor"; + break; + case nir_intrinsic_image_atomic_exchange: + atomic_name = "swap"; + break; + case nir_intrinsic_image_atomic_comp_swap: + atomic_name = "cmpswap"; + break; + default: + abort(); } - /* pack derivatives */ - if (ddx || ddy) { - int num_src_deriv_channels, num_dest_deriv_channels; - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - num_deriv_comp = 3; - num_src_deriv_channels = 3; - num_dest_deriv_channels = 3; - break; - case GLSL_SAMPLER_DIM_2D: - default: - num_src_deriv_channels = 2; - num_dest_deriv_channels = 2; - num_deriv_comp = 2; - break; - case GLSL_SAMPLER_DIM_1D: - num_src_deriv_channels = 1; - if (ctx->ac.chip_class >= GFX9) { - num_dest_deriv_channels = 2; - num_deriv_comp = 2; - } else { - num_dest_deriv_channels = 1; - num_deriv_comp = 1; - } - break; - } + if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap) + params[param_count++] = get_src(ctx, instr->src[3]); + params[param_count++] = get_src(ctx, instr->src[2]); - for (unsigned i = 0; i < num_src_deriv_channels; i++) { - derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); - derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); - } - for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { - derivs[i] = ctx->ac.f32_0; - derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; - } - } + if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) { + params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, + NULL, true, true); + params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); /* vindex */ + params[param_count++] = ctx->ac.i32_0; /* voffset */ + params[param_count++] = ctx->ac.i1false; /* slc */ - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_to_float(&ctx->ac, coords[chan]); - if (instr->coord_components == 3) - coords[3] = LLVMGetUndef(ctx->ac.f32); - ac_prepare_cube_coords(&ctx->ac, - instr->op == nir_texop_txd, instr->is_array, - instr->op == nir_texop_lod, coords, derivs); - if (num_deriv_comp) - num_deriv_comp--; - } + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.buffer.atomic.%s", atomic_name); + } else { + char coords_type[8]; - if (ddx || ddy) { - for (unsigned i = 0; i < num_deriv_comp * 2; i++) - address[count++] = derivs[i]; + LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr); + params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, + NULL, true, true); + params[param_count++] = ctx->ac.i1false; /* r128 */ + params[param_count++] = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false; /* da */ + params[param_count++] = ctx->ac.i1false; /* slc */ + + build_int_type_name(LLVMTypeOf(coords), + coords_type, sizeof(coords_type)); + + length = snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type); } - /* Pack texture coordinates */ - if (coord) { - address[count++] = coords[0]; - if (instr->coord_components > 1) { - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { - coords[1] = apply_round_slice(&ctx->ac, coords[1]); - } - address[count++] = coords[1]; - } - if (instr->coord_components > 2) { - if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && - instr->is_array && - instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { - coords[2] = apply_round_slice(&ctx->ac, coords[2]); - } - address[count++] = coords[2]; - } + assert(length < sizeof(intrinsic_name)); + return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0); +} - if (ctx->ac.chip_class >= GFX9) { - LLVMValueRef filler; - if (instr->op == nir_texop_txf) - filler = ctx->ac.i32_0; - else - filler = LLVMConstReal(ctx->ac.f32, 0.5); +static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + const nir_variable *var = instr->variables[0]->var; + const struct glsl_type *type = glsl_without_array(var->type); - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { - /* No nir_texop_lod, because it does not take a slice - * even with array textures. */ - if (instr->is_array && instr->op != nir_texop_lod ) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; - } else - address[count++] = filler; - } - } - } + struct ac_image_args args = { 0 }; + args.da = glsl_is_array_image(type); + args.dmask = 0xf; + args.resource = get_sampler_desc(ctx, instr->variables[0], + AC_DESC_IMAGE, NULL, true, false); + args.opcode = ac_image_get_resinfo; + args.addr = ctx->ac.i32_0; - /* Pack LOD */ - if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) { - address[count++] = lod; - } else if (instr->op == nir_texop_txf_ms && sample_index) { - address[count++] = sample_index; - } else if(instr->op == nir_texop_txs) { - count = 0; - if (lod) - address[count++] = lod; - else - address[count++] = ctx->ac.i32_0; - } + return ac_build_image_opcode(&ctx->ac, &args); +} - for (chan = 0; chan < count; chan++) { - address[chan] = LLVMBuildBitCast(ctx->ac.builder, - address[chan], ctx->ac.i32, ""); - } +static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef res; + const nir_variable *var = instr->variables[0]->var; + const struct glsl_type *type = glsl_without_array(var->type); - if (instr->op == nir_texop_samples_identical) { - LLVMValueRef txf_address[4]; - struct ac_image_args txf_args = { 0 }; - unsigned txf_count = count; - memcpy(txf_address, address, sizeof(txf_address)); + if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) + return get_buffer_size(ctx, + get_sampler_desc(ctx, instr->variables[0], + AC_DESC_BUFFER, NULL, true, false), true); - if (!instr->is_array) - txf_address[2] = ctx->ac.i32_0; - txf_address[3] = ctx->ac.i32_0; + struct ac_image_args args = { 0 }; - set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, - fmask_ptr, NULL, - txf_address, txf_count, 0xf); + args.da = glsl_is_array_image(type); + args.dmask = 0xf; + args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); + args.opcode = ac_image_get_resinfo; + args.addr = ctx->ac.i32_0; - result = build_tex_intrinsic(ctx, instr, false, &txf_args); + res = ac_build_image_opcode(&ctx->ac, &args); - result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); - result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); - goto write_result; - } + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); - if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && - instr->op != nir_texop_txs) { - unsigned sample_chan = instr->is_array ? 3 : 2; - address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, - address[0], - address[1], - instr->is_array ? address[2] : NULL, - address[sample_chan], - fmask_ptr); + if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE && + glsl_sampler_type_is_array(type)) { + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, ""); } - - if (offsets && instr->op == nir_texop_txf) { - nir_const_value *const_offset = - nir_src_as_const_value(instr->src[const_src].src); - int num_offsets = instr->src[const_src].src.ssa->num_components; - assert(const_offset); - num_offsets = MIN2(num_offsets, instr->coord_components); - if (num_offsets > 2) - address[2] = LLVMBuildAdd(ctx->ac.builder, - address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); - if (num_offsets > 1) - address[1] = LLVMBuildAdd(ctx->ac.builder, - address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); - address[0] = LLVMBuildAdd(ctx->ac.builder, - address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); + if (ctx->ac.chip_class >= GFX9 && + glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D && + glsl_sampler_type_is_array(type)) { + LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, ""); + res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, + ctx->ac.i32_1, ""); } + return res; +} - /* TODO TG4 support */ - if (instr->op == nir_texop_tg4) { - if (instr->is_shadow) - dmask = 1; - else - dmask = 1 << instr->component; +#define NOOP_WAITCNT 0xf7f +#define LGKM_CNT 0x07f +#define VM_CNT 0xf70 + +static void emit_membar(struct ac_llvm_context *ac, + const nir_intrinsic_instr *instr) +{ + unsigned waitcnt = NOOP_WAITCNT; + + switch (instr->intrinsic) { + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + waitcnt &= VM_CNT & LGKM_CNT; + break; + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + waitcnt &= VM_CNT; + break; + case nir_intrinsic_memory_barrier_shared: + waitcnt &= LGKM_CNT; + break; + default: + break; } - set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, - res_ptr, samp_ptr, address, count, dmask); + if (waitcnt != NOOP_WAITCNT) + ac_build_waitcnt(ac, waitcnt); +} - result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); +void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage) +{ + /* SI only (thanks to a hw bug workaround): + * The real barrier instruction isn’t needed, because an entire patch + * always fits into a single wave. + */ + if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) { + ac_build_waitcnt(ac, LGKM_CNT & VM_CNT); + return; + } + ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier", + ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT); +} - if (instr->op == nir_texop_query_levels) - result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); - else if (instr->is_shadow && instr->is_new_style_shadow && - instr->op != nir_texop_txs && instr->op != nir_texop_lod && - instr->op != nir_texop_tg4) - result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); - else if (instr->op == nir_texop_txs && - instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - instr->is_array) { - LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); - LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); - LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); - z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); - result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, ""); - } else if (ctx->ac.chip_class >= GFX9 && - instr->op == nir_texop_txs && - instr->sampler_dim == GLSL_SAMPLER_DIM_1D && - instr->is_array) { - LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); - LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); - result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, - ctx->ac.i32_1, ""); - } else if (instr->dest.ssa.num_components != 4) - result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components); +static void emit_discard(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef cond; -write_result: - if (result) { - assert(instr->dest.is_ssa); - result = ac_to_integer(&ctx->ac, result); - _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); + if (instr->intrinsic == nir_intrinsic_discard_if) { + cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, + get_src(ctx, instr->src[0]), + ctx->ac.i32_0, ""); + } else { + assert(instr->intrinsic == nir_intrinsic_discard); + cond = LLVMConstInt(ctx->ac.i1, false, 0); } + + ctx->abi->emit_kill(ctx->abi, cond); } +static LLVMValueRef +visit_load_helper_invocation(struct ac_nir_context *ctx) +{ + LLVMValueRef result = ac_build_intrinsic(&ctx->ac, + "llvm.amdgcn.ps.live", + ctx->ac.i1, NULL, 0, + AC_FUNC_ATTR_READNONE); + result = LLVMBuildNot(ctx->ac.builder, result, ""); + return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, ""); +} -static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr) +static LLVMValueRef +visit_load_local_invocation_index(struct ac_nir_context *ctx) { - LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); - LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, ""); + LLVMValueRef result; + LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac); + result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size, + LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); - _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); - _mesa_hash_table_insert(ctx->phis, instr, result); + return LLVMBuildAdd(ctx->ac.builder, result, thread_id, ""); } -static void visit_post_phi(struct ac_nir_context *ctx, - nir_phi_instr *instr, - LLVMValueRef llvm_phi) +static LLVMValueRef +visit_load_subgroup_id(struct ac_nir_context *ctx) { - nir_foreach_phi_src(src, instr) { - LLVMBasicBlockRef block = get_block(ctx, src->pred); - LLVMValueRef llvm_src = get_src(ctx, src->src); - - LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1); + if (ctx->stage == MESA_SHADER_COMPUTE) { + LLVMValueRef result; + result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size, + LLVMConstInt(ctx->ac.i32, 0xfc0, false), ""); + return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), ""); + } else { + return LLVMConstInt(ctx->ac.i32, 0, false); } } -static void phi_post_pass(struct ac_nir_context *ctx) +static LLVMValueRef +visit_load_num_subgroups(struct ac_nir_context *ctx) { - struct hash_entry *entry; - hash_table_foreach(ctx->phis, entry) { - visit_post_phi(ctx, (nir_phi_instr*)entry->key, - (LLVMValueRef)entry->data); + if (ctx->stage == MESA_SHADER_COMPUTE) { + return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size, + LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); + } else { + return LLVMConstInt(ctx->ac.i32, 1, false); } } +static LLVMValueRef +visit_first_invocation(struct ac_nir_context *ctx) +{ + LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1); -static void visit_ssa_undef(struct ac_nir_context *ctx, - const nir_ssa_undef_instr *instr) + /* The second argument is whether cttz(0) should be defined, but we do not care. */ + LLVMValueRef args[] = {active_set, LLVMConstInt(ctx->ac.i1, 0, false)}; + LLVMValueRef result = ac_build_intrinsic(&ctx->ac, + "llvm.cttz.i64", + ctx->ac.i64, args, 2, + AC_FUNC_ATTR_NOUNWIND | + AC_FUNC_ATTR_READNONE); + + return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, ""); +} + +static LLVMValueRef +visit_load_shared(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { - unsigned num_components = instr->def.num_components; - LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); - LLVMValueRef undef; + LLVMValueRef values[4], derived_ptr, index, ret; - if (num_components == 1) - undef = LLVMGetUndef(type); - else { - undef = LLVMGetUndef(LLVMVectorType(type, num_components)); + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]); + + for (int chan = 0; chan < instr->num_components; chan++) { + index = LLVMConstInt(ctx->ac.i32, chan, 0); + derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); + values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, ""); } - _mesa_hash_table_insert(ctx->defs, &instr->def, undef); + + ret = ac_build_gather_values(&ctx->ac, values, instr->num_components); + return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), ""); } -static void visit_jump(struct ac_llvm_context *ctx, - const nir_jump_instr *instr) +static void +visit_store_shared(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) { - switch (instr->type) { - case nir_jump_break: - ac_build_break(ctx); - break; - case nir_jump_continue: - ac_build_continue(ctx); - break; - default: - fprintf(stderr, "Unknown NIR jump instr: "); - nir_print_instr(&instr->instr, stderr); - fprintf(stderr, "\n"); - abort(); + LLVMValueRef derived_ptr, data,index; + LLVMBuilderRef builder = ctx->ac.builder; + + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]); + LLVMValueRef src = get_src(ctx, instr->src[0]); + + int writemask = nir_intrinsic_write_mask(instr); + for (int chan = 0; chan < 4; chan++) { + if (!(writemask & (1 << chan))) { + continue; + } + data = ac_llvm_extract_elem(&ctx->ac, src, chan); + index = LLVMConstInt(ctx->ac.i32, chan, 0); + derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); + LLVMBuildStore(builder, data, derived_ptr); } } -static void visit_cf_list(struct ac_nir_context *ctx, - struct exec_list *list); - -static void visit_block(struct ac_nir_context *ctx, nir_block *block) +static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr, + LLVMValueRef ptr, int src_idx) { - LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder); - nir_foreach_instr(instr, block) - { - switch (instr->type) { - case nir_instr_type_alu: - visit_alu(ctx, nir_instr_as_alu(instr)); + LLVMValueRef result; + LLVMValueRef src = get_src(ctx, instr->src[src_idx]); + + if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap || + instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) { + LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]); + result = LLVMBuildAtomicCmpXchg(ctx->ac.builder, + ptr, src, src1, + LLVMAtomicOrderingSequentiallyConsistent, + LLVMAtomicOrderingSequentiallyConsistent, + false); + } else { + LLVMAtomicRMWBinOp op; + switch (instr->intrinsic) { + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_shared_atomic_add: + op = LLVMAtomicRMWBinOpAdd; break; - case nir_instr_type_load_const: - visit_load_const(ctx, nir_instr_as_load_const(instr)); + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_shared_atomic_umin: + op = LLVMAtomicRMWBinOpUMin; + break; + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_shared_atomic_umax: + op = LLVMAtomicRMWBinOpUMax; + break; + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_shared_atomic_imin: + op = LLVMAtomicRMWBinOpMin; break; - case nir_instr_type_intrinsic: - visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_shared_atomic_imax: + op = LLVMAtomicRMWBinOpMax; break; - case nir_instr_type_tex: - visit_tex(ctx, nir_instr_as_tex(instr)); + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_shared_atomic_and: + op = LLVMAtomicRMWBinOpAnd; break; - case nir_instr_type_phi: - visit_phi(ctx, nir_instr_as_phi(instr)); + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_shared_atomic_or: + op = LLVMAtomicRMWBinOpOr; break; - case nir_instr_type_ssa_undef: - visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_shared_atomic_xor: + op = LLVMAtomicRMWBinOpXor; break; - case nir_instr_type_jump: - visit_jump(&ctx->ac, nir_instr_as_jump(instr)); + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_shared_atomic_exchange: + op = LLVMAtomicRMWBinOpXchg; break; default: - fprintf(stderr, "Unknown NIR instr type: "); - nir_print_instr(instr, stderr); - fprintf(stderr, "\n"); - abort(); + return NULL; } - } - _mesa_hash_table_insert(ctx->defs, block, llvm_block); + result = LLVMBuildAtomicRMW(ctx->ac.builder, op, ptr, ac_to_integer(&ctx->ac, src), + LLVMAtomicOrderingSequentiallyConsistent, + false); + } + return result; } -static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt) +static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx) { - LLVMValueRef value = get_src(ctx, if_stmt->condition); + LLVMValueRef values[2]; + LLVMValueRef pos[2]; - nir_block *then_block = - (nir_block *) exec_list_get_head(&if_stmt->then_list); + pos[0] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[0]); + pos[1] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[1]); - ac_build_uif(&ctx->ac, value, then_block->index); + values[0] = ac_build_fract(&ctx->ac, pos[0], 32); + values[1] = ac_build_fract(&ctx->ac, pos[1], 32); + return ac_build_gather_values(&ctx->ac, values, 2); +} - visit_cf_list(ctx, &if_stmt->then_list); +static LLVMValueRef visit_interp(struct ac_nir_context *ctx, + const nir_intrinsic_instr *instr) +{ + LLVMValueRef result[4]; + LLVMValueRef interp_param, attr_number; + unsigned location; + unsigned chan; + LLVMValueRef src_c0 = NULL; + LLVMValueRef src_c1 = NULL; + LLVMValueRef src0 = NULL; + int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0; + switch (instr->intrinsic) { + case nir_intrinsic_interp_var_at_centroid: + location = INTERP_CENTROID; + break; + case nir_intrinsic_interp_var_at_sample: + case nir_intrinsic_interp_var_at_offset: + location = INTERP_CENTER; + src0 = get_src(ctx, instr->src[0]); + break; + default: + break; + } - if (!exec_list_is_empty(&if_stmt->else_list)) { - nir_block *else_block = - (nir_block *) exec_list_get_head(&if_stmt->else_list); + if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) { + src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_0, "")); + src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_1, "")); + } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) { + LLVMValueRef sample_position; + LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f); - ac_build_else(&ctx->ac, else_block->index); - visit_cf_list(ctx, &if_stmt->else_list); + /* fetch sample ID */ + sample_position = ctx->abi->load_sample_position(ctx->abi, src0); + + src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_0, ""); + src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, ""); + src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_1, ""); + src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, ""); } + interp_param = ctx->abi->lookup_interp_param(ctx->abi, instr->variables[0]->var->data.interpolation, location); + attr_number = LLVMConstInt(ctx->ac.i32, input_index, false); - ac_build_endif(&ctx->ac, then_block->index); -} + if (location == INTERP_CENTER) { + LLVMValueRef ij_out[2]; + LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param); -static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop) -{ - nir_block *first_loop_block = - (nir_block *) exec_list_get_head(&loop->body); + /* + * take the I then J parameters, and the DDX/Y for it, and + * calculate the IJ inputs for the interpolator. + * temp1 = ddx * offset/sample.x + I; + * interp_param.I = ddy * offset/sample.y + temp1; + * temp1 = ddx * offset/sample.x + J; + * interp_param.J = ddy * offset/sample.y + temp1; + */ + for (unsigned i = 0; i < 2; i++) { + LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false); + LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false); + LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, + ddxy_out, ix_ll, ""); + LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, + ddxy_out, iy_ll, ""); + LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, + interp_param, ix_ll, ""); + LLVMValueRef temp1, temp2; - ac_build_bgnloop(&ctx->ac, first_loop_block->index); + interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, + ctx->ac.f32, ""); - visit_cf_list(ctx, &loop->body); + temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, src_c0, ""); + temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, ""); - ac_build_endloop(&ctx->ac, first_loop_block->index); -} + temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, src_c1, ""); + temp2 = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, ""); -static void visit_cf_list(struct ac_nir_context *ctx, - struct exec_list *list) -{ - foreach_list_typed(nir_cf_node, node, node, list) - { - switch (node->type) { - case nir_cf_node_block: - visit_block(ctx, nir_cf_node_as_block(node)); - break; + ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, + temp2, ctx->ac.i32, ""); + } + interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); - case nir_cf_node_if: - visit_if(ctx, nir_cf_node_as_if(node)); - break; + } - case nir_cf_node_loop: - visit_loop(ctx, nir_cf_node_as_loop(node)); - break; + for (chan = 0; chan < 4; chan++) { + LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - default: - assert(0); + if (interp_param) { + interp_param = LLVMBuildBitCast(ctx->ac.builder, + interp_param, ctx->ac.v2f32, ""); + LLVMValueRef i = LLVMBuildExtractElement( + ctx->ac.builder, interp_param, ctx->ac.i32_0, ""); + LLVMValueRef j = LLVMBuildExtractElement( + ctx->ac.builder, interp_param, ctx->ac.i32_1, ""); + + result[chan] = ac_build_fs_interp(&ctx->ac, + llvm_chan, attr_number, + ctx->abi->prim_mask, i, j); + } else { + result[chan] = ac_build_fs_interp_mov(&ctx->ac, + LLVMConstInt(ctx->ac.i32, 2, false), + llvm_chan, attr_number, + ctx->abi->prim_mask); } } + return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components, + instr->variables[0]->var->data.location_frac); } -static void -handle_vs_input_decl(struct radv_shader_context *ctx, - struct nir_variable *variable) +static void visit_intrinsic(struct ac_nir_context *ctx, + nir_intrinsic_instr *instr) { - LLVMValueRef t_list_ptr = ctx->vertex_buffers; - LLVMValueRef t_offset; - LLVMValueRef t_list; - LLVMValueRef input; - LLVMValueRef buffer_index; - int index = variable->data.location - VERT_ATTRIB_GENERIC0; - int idx = variable->data.location; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); - uint8_t input_usage_mask = - ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; - unsigned num_channels = util_last_bit(input_usage_mask); - - variable->data.driver_location = idx * 4; - - for (unsigned i = 0; i < attrib_count; ++i, ++idx) { - if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) { - buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, - ctx->abi.start_instance, ""); - if (ctx->options->key.vs.as_ls) { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); - } else { - ctx->shader_info->vs.vgpr_comp_cnt = - MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); - } + LLVMValueRef result = NULL; + + switch (instr->intrinsic) { + case nir_intrinsic_ballot: + result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0])); + break; + case nir_intrinsic_read_invocation: + case nir_intrinsic_read_first_invocation: { + LLVMValueRef args[2]; + + /* Value */ + args[0] = get_src(ctx, instr->src[0]); + + unsigned num_args; + const char *intr_name; + if (instr->intrinsic == nir_intrinsic_read_invocation) { + num_args = 2; + intr_name = "llvm.amdgcn.readlane"; + + /* Invocation */ + args[1] = get_src(ctx, instr->src[1]); + } else { + num_args = 1; + intr_name = "llvm.amdgcn.readfirstlane"; + } + + /* We currently have no other way to prevent LLVM from lifting the icmp + * calls to a dominating basic block. + */ + ac_build_optimization_barrier(&ctx->ac, &args[0]); + + result = ac_build_intrinsic(&ctx->ac, intr_name, + ctx->ac.i32, args, num_args, + AC_FUNC_ATTR_READNONE | + AC_FUNC_ATTR_CONVERGENT); + break; + } + case nir_intrinsic_load_subgroup_invocation: + result = ac_get_thread_id(&ctx->ac); + break; + case nir_intrinsic_load_work_group_id: { + LLVMValueRef values[3]; + + for (int i = 0; i < 3; i++) { + values[i] = ctx->abi->workgroup_ids[i] ? + ctx->abi->workgroup_ids[i] : ctx->ac.i32_0; + } + + result = ac_build_gather_values(&ctx->ac, values, 3); + break; + } + case nir_intrinsic_load_base_vertex: { + result = ctx->abi->load_base_vertex(ctx->abi); + break; + } + case nir_intrinsic_load_local_group_size: + result = ctx->abi->load_local_group_size(ctx->abi); + break; + case nir_intrinsic_load_vertex_id: + result = LLVMBuildAdd(ctx->ac.builder, ctx->abi->vertex_id, + ctx->abi->base_vertex, ""); + break; + case nir_intrinsic_load_vertex_id_zero_base: { + result = ctx->abi->vertex_id; + break; + } + case nir_intrinsic_load_local_invocation_id: { + result = ctx->abi->local_invocation_ids; + break; + } + case nir_intrinsic_load_base_instance: + result = ctx->abi->start_instance; + break; + case nir_intrinsic_load_draw_id: + result = ctx->abi->draw_id; + break; + case nir_intrinsic_load_view_index: + result = ctx->abi->view_index; + break; + case nir_intrinsic_load_invocation_id: + if (ctx->stage == MESA_SHADER_TESS_CTRL) + result = ac_unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5); + else + result = ctx->abi->gs_invocation_id; + break; + case nir_intrinsic_load_primitive_id: + if (ctx->stage == MESA_SHADER_GEOMETRY) { + result = ctx->abi->gs_prim_id; + } else if (ctx->stage == MESA_SHADER_TESS_CTRL) { + result = ctx->abi->tcs_patch_id; + } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + result = ctx->abi->tes_patch_id; } else - buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, - ctx->abi.base_vertex, ""); - t_offset = LLVMConstInt(ctx->ac.i32, index + i, false); - - t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); - - input = ac_build_buffer_load_format(&ctx->ac, t_list, - buffer_index, - ctx->ac.i32_0, - num_channels, false, true); - - input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels); - - for (unsigned chan = 0; chan < 4; chan++) { - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = - ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, - input, llvm_chan, "")); - } + fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage); + break; + case nir_intrinsic_load_sample_id: + result = ac_unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4); + break; + case nir_intrinsic_load_sample_pos: + result = load_sample_pos(ctx); + break; + case nir_intrinsic_load_sample_mask_in: + result = ctx->abi->load_sample_mask_in(ctx->abi); + break; + case nir_intrinsic_load_frag_coord: { + LLVMValueRef values[4] = { + ctx->abi->frag_pos[0], + ctx->abi->frag_pos[1], + ctx->abi->frag_pos[2], + ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3]) + }; + result = ac_build_gather_values(&ctx->ac, values, 4); + break; } -} - -static void interp_fs_input(struct radv_shader_context *ctx, - unsigned attr, - LLVMValueRef interp_param, - LLVMValueRef prim_mask, - LLVMValueRef result[4]) -{ - LLVMValueRef attr_number; - unsigned chan; - LLVMValueRef i, j; - bool interp = interp_param != NULL; - - attr_number = LLVMConstInt(ctx->ac.i32, attr, false); - - /* fs.constant returns the param from the middle vertex, so it's not - * really useful for flat shading. It's meant to be used for custom - * interpolation (but the intrinsic can't fetch from the other two - * vertices). - * - * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state - * to do the right thing. The only reason we use fs.constant is that - * fs.interp cannot be used on integers, because they can be equal - * to NaN. - */ - if (interp) { - interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param, - ctx->ac.v2f32, ""); + case nir_intrinsic_load_front_face: + result = ctx->abi->front_face; + break; + case nir_intrinsic_load_helper_invocation: + result = visit_load_helper_invocation(ctx); + break; + case nir_intrinsic_load_instance_id: + result = ctx->abi->instance_id; + break; + case nir_intrinsic_load_num_work_groups: + result = ctx->abi->num_work_groups; + break; + case nir_intrinsic_load_local_invocation_index: + result = visit_load_local_invocation_index(ctx); + break; + case nir_intrinsic_load_subgroup_id: + result = visit_load_subgroup_id(ctx); + break; + case nir_intrinsic_load_num_subgroups: + result = visit_load_num_subgroups(ctx); + break; + case nir_intrinsic_first_invocation: + result = visit_first_invocation(ctx); + break; + case nir_intrinsic_load_push_constant: + result = visit_load_push_constant(ctx, instr); + break; + case nir_intrinsic_vulkan_resource_index: { + LLVMValueRef index = get_src(ctx, instr->src[0]); + unsigned desc_set = nir_intrinsic_desc_set(instr); + unsigned binding = nir_intrinsic_binding(instr); - i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, - ctx->ac.i32_0, ""); - j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, - ctx->ac.i32_1, ""); + result = ctx->abi->load_resource(ctx->abi, index, desc_set, + binding); + break; } - - for (chan = 0; chan < 4; chan++) { - LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); - - if (interp) { - result[chan] = ac_build_fs_interp(&ctx->ac, - llvm_chan, - attr_number, - prim_mask, i, j); - } else { - result[chan] = ac_build_fs_interp_mov(&ctx->ac, - LLVMConstInt(ctx->ac.i32, 2, false), - llvm_chan, - attr_number, - prim_mask); - } + case nir_intrinsic_vulkan_resource_reindex: + result = visit_vulkan_resource_reindex(ctx, instr); + break; + case nir_intrinsic_store_ssbo: + visit_store_ssbo(ctx, instr); + break; + case nir_intrinsic_load_ssbo: + result = visit_load_buffer(ctx, instr); + break; + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: + result = visit_atomic_ssbo(ctx, instr); + break; + case nir_intrinsic_load_ubo: + result = visit_load_ubo_buffer(ctx, instr); + break; + case nir_intrinsic_get_buffer_size: + result = visit_get_buffer_size(ctx, instr); + break; + case nir_intrinsic_load_var: + result = visit_load_var(ctx, instr); + break; + case nir_intrinsic_store_var: + visit_store_var(ctx, instr); + break; + case nir_intrinsic_load_shared: + result = visit_load_shared(ctx, instr); + break; + case nir_intrinsic_store_shared: + visit_store_shared(ctx, instr); + break; + case nir_intrinsic_image_samples: + result = visit_image_samples(ctx, instr); + break; + case nir_intrinsic_image_load: + result = visit_image_load(ctx, instr); + break; + case nir_intrinsic_image_store: + visit_image_store(ctx, instr); + break; + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + result = visit_image_atomic(ctx, instr); + break; + case nir_intrinsic_image_size: + result = visit_image_size(ctx, instr); + break; + case nir_intrinsic_shader_clock: + result = ac_build_shader_clock(&ctx->ac); + break; + case nir_intrinsic_discard: + case nir_intrinsic_discard_if: + emit_discard(ctx, instr); + break; + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + emit_membar(&ctx->ac, instr); + break; + case nir_intrinsic_barrier: + ac_emit_barrier(&ctx->ac, ctx->stage); + break; + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: { + LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]); + result = visit_var_atomic(ctx, instr, ptr, 1); + break; } -} - -static void -handle_fs_input_decl(struct radv_shader_context *ctx, - struct nir_variable *variable) -{ - int idx = variable->data.location; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - LLVMValueRef interp; - - variable->data.driver_location = idx * 4; - ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; - - if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { - unsigned interp_type; - if (variable->data.sample) - interp_type = INTERP_SAMPLE; - else if (variable->data.centroid) - interp_type = INTERP_CENTROID; - else - interp_type = INTERP_CENTER; - - interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); - } else - interp = NULL; - - for (unsigned i = 0; i < attrib_count; ++i) - ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp; - -} - -static void -handle_vs_inputs(struct radv_shader_context *ctx, - struct nir_shader *nir) { - nir_foreach_variable(variable, &nir->inputs) - handle_vs_input_decl(ctx, variable); -} - -static void -prepare_interp_optimize(struct radv_shader_context *ctx, - struct nir_shader *nir) -{ - if (!ctx->options->key.fs.multisample) - return; - - bool uses_center = false; - bool uses_centroid = false; - nir_foreach_variable(variable, &nir->inputs) { - if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT || - variable->data.sample) - continue; - - if (variable->data.centroid) - uses_centroid = true; - else - uses_center = true; + case nir_intrinsic_var_atomic_add: + case nir_intrinsic_var_atomic_imin: + case nir_intrinsic_var_atomic_umin: + case nir_intrinsic_var_atomic_imax: + case nir_intrinsic_var_atomic_umax: + case nir_intrinsic_var_atomic_and: + case nir_intrinsic_var_atomic_or: + case nir_intrinsic_var_atomic_xor: + case nir_intrinsic_var_atomic_exchange: + case nir_intrinsic_var_atomic_comp_swap: { + LLVMValueRef ptr = build_gep_for_deref(ctx, instr->variables[0]); + result = visit_var_atomic(ctx, instr, ptr, 0); + break; + } + case nir_intrinsic_interp_var_at_centroid: + case nir_intrinsic_interp_var_at_sample: + case nir_intrinsic_interp_var_at_offset: + result = visit_interp(ctx, instr); + break; + case nir_intrinsic_emit_vertex: + ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs); + break; + case nir_intrinsic_end_primitive: + ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr)); + break; + case nir_intrinsic_load_tess_coord: + result = ctx->abi->load_tess_coord(ctx->abi); + break; + case nir_intrinsic_load_tess_level_outer: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER); + break; + case nir_intrinsic_load_tess_level_inner: + result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER); + break; + case nir_intrinsic_load_patch_vertices_in: + result = ctx->abi->load_patch_vertices_in(ctx->abi); + break; + case nir_intrinsic_vote_all: { + LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0])); + result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + break; + } + case nir_intrinsic_vote_any: { + LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0])); + result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, ""); + break; } - - if (uses_center && uses_centroid) { - LLVMValueRef sel = LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT, ctx->abi.prim_mask, ctx->ac.i32_0, ""); - ctx->persp_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->persp_center, ctx->persp_centroid, ""); - ctx->linear_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->linear_center, ctx->linear_centroid, ""); + default: + fprintf(stderr, "Unknown intrinsic: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + break; } -} - -static void -handle_fs_inputs(struct radv_shader_context *ctx, - struct nir_shader *nir) -{ - prepare_interp_optimize(ctx, nir); - - nir_foreach_variable(variable, &nir->inputs) - handle_fs_input_decl(ctx, variable); - - unsigned index = 0; - - if (ctx->shader_info->info.ps.uses_input_attachments || - ctx->shader_info->info.needs_multiview_view_index) - ctx->input_mask |= 1ull << VARYING_SLOT_LAYER; - - for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) { - LLVMValueRef interp_param; - LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0); - - if (!(ctx->input_mask & (1ull << i))) - continue; - - if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || - i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { - interp_param = *inputs; - interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, - inputs); - - if (!interp_param) - ctx->shader_info->fs.flat_shaded_mask |= 1u << index; - ++index; - } else if (i == VARYING_SLOT_POS) { - for(int i = 0; i < 3; ++i) - inputs[i] = ctx->abi.frag_pos[i]; - - inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, - ctx->abi.frag_pos[3]); - } + if (result) { + _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); } - ctx->shader_info->fs.num_interp = index; - ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0; - - if (ctx->shader_info->info.needs_multiview_view_index) - ctx->abi.view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; } -static void -scan_shader_output_decl(struct radv_shader_context *ctx, - struct nir_variable *variable, - struct nir_shader *shader, - gl_shader_stage stage) +static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, + const nir_deref_var *deref, + enum ac_descriptor_type desc_type, + const nir_tex_instr *tex_instr, + bool image, bool write) { - int idx = variable->data.location + variable->data.index; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - uint64_t mask_attribs; - - variable->data.driver_location = idx * 4; - - /* tess ctrl has it's own load/store paths for outputs */ - if (stage == MESA_SHADER_TESS_CTRL) - return; + LLVMValueRef index = NULL; + unsigned constant_index = 0; + unsigned descriptor_set; + unsigned base_index; - mask_attribs = ((1ull << attrib_count) - 1) << idx; - if (stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_EVAL || - stage == MESA_SHADER_GEOMETRY) { - if (idx == VARYING_SLOT_CLIP_DIST0) { - int length = shader->info.clip_distance_array_size + - shader->info.cull_distance_array_size; - if (stage == MESA_SHADER_VERTEX) { - ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; - ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; - } - if (stage == MESA_SHADER_TESS_EVAL) { - ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; - ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; - } + if (!deref) { + assert(tex_instr && !image); + descriptor_set = 0; + base_index = tex_instr->sampler_index; + } else { + const nir_deref *tail = &deref->deref; + while (tail->child) { + const nir_deref_array *child = nir_deref_as_array(tail->child); + unsigned array_size = glsl_get_aoa_size(tail->child->type); - if (length > 4) - attrib_count = 2; - else - attrib_count = 1; - mask_attribs = 1ull << idx; - } - } + if (!array_size) + array_size = 1; - ctx->output_mask |= mask_attribs; -} + assert(child->deref_array_type != nir_deref_array_type_wildcard); -void -ac_handle_shader_output_decl(struct ac_llvm_context *ctx, - struct ac_shader_abi *abi, - struct nir_shader *nir, - struct nir_variable *variable, - gl_shader_stage stage) -{ - unsigned output_loc = variable->data.driver_location / 4; - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + if (child->deref_array_type == nir_deref_array_type_indirect) { + LLVMValueRef indirect = get_src(ctx, child->indirect); - /* tess ctrl has it's own load/store paths for outputs */ - if (stage == MESA_SHADER_TESS_CTRL) - return; + indirect = LLVMBuildMul(ctx->ac.builder, indirect, + LLVMConstInt(ctx->ac.i32, array_size, false), ""); - if (stage == MESA_SHADER_VERTEX || - stage == MESA_SHADER_TESS_EVAL || - stage == MESA_SHADER_GEOMETRY) { - int idx = variable->data.location + variable->data.index; - if (idx == VARYING_SLOT_CLIP_DIST0) { - int length = nir->info.clip_distance_array_size + - nir->info.cull_distance_array_size; + if (!index) + index = indirect; + else + index = LLVMBuildAdd(ctx->ac.builder, index, indirect, ""); + } - if (length > 4) - attrib_count = 2; - else - attrib_count = 1; - } - } + constant_index += child->base_offset * array_size; - for (unsigned i = 0; i < attrib_count; ++i) { - for (unsigned chan = 0; chan < 4; chan++) { - abi->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] = - ac_build_alloca_undef(ctx, ctx->f32, ""); + tail = &child->deref; } + descriptor_set = deref->var->data.descriptor_set; + base_index = deref->var->data.binding; } -} -static LLVMTypeRef -glsl_base_to_llvm_type(struct ac_llvm_context *ac, - enum glsl_base_type type) -{ - switch (type) { - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_SUBROUTINE: - return ac->i32; - case GLSL_TYPE_FLOAT: /* TODO handle mediump */ - return ac->f32; - case GLSL_TYPE_INT64: - case GLSL_TYPE_UINT64: - return ac->i64; - case GLSL_TYPE_DOUBLE: - return ac->f64; - default: - unreachable("unknown GLSL type"); - } + return ctx->abi->load_sampler_desc(ctx->abi, + descriptor_set, + base_index, + constant_index, index, + desc_type, image, write); } -static LLVMTypeRef -glsl_to_llvm_type(struct ac_llvm_context *ac, - const struct glsl_type *type) +static void set_tex_fetch_args(struct ac_llvm_context *ctx, + struct ac_image_args *args, + const nir_tex_instr *instr, + nir_texop op, + LLVMValueRef res_ptr, LLVMValueRef samp_ptr, + LLVMValueRef *param, unsigned count, + unsigned dmask) { - if (glsl_type_is_scalar(type)) { - return glsl_base_to_llvm_type(ac, glsl_get_base_type(type)); - } - - if (glsl_type_is_vector(type)) { - return LLVMVectorType( - glsl_base_to_llvm_type(ac, glsl_get_base_type(type)), - glsl_get_vector_elements(type)); - } - - if (glsl_type_is_matrix(type)) { - return LLVMArrayType( - glsl_to_llvm_type(ac, glsl_get_column_type(type)), - glsl_get_matrix_columns(type)); - } + unsigned is_rect = 0; + bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE; - if (glsl_type_is_array(type)) { - return LLVMArrayType( - glsl_to_llvm_type(ac, glsl_get_array_element(type)), - glsl_get_length(type)); - } + if (op == nir_texop_lod) + da = false; + /* Pad to power of two vector */ + while (count < util_next_power_of_two(count)) + param[count++] = LLVMGetUndef(ctx->i32); - assert(glsl_type_is_struct(type)); + if (count > 1) + args->addr = ac_build_gather_values(ctx, param, count); + else + args->addr = param[0]; - LLVMTypeRef member_types[glsl_get_length(type)]; + args->resource = res_ptr; + args->sampler = samp_ptr; - for (unsigned i = 0; i < glsl_get_length(type); i++) { - member_types[i] = - glsl_to_llvm_type(ac, - glsl_get_struct_field(type, i)); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { + args->addr = param[0]; + return; } - return LLVMStructTypeInContext(ac->context, member_types, - glsl_get_length(type), false); + args->dmask = dmask; + args->unorm = is_rect; + args->da = da; } -static void -setup_locals(struct ac_nir_context *ctx, - struct nir_function *func) +/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. + * + * SI-CI: + * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic + * filtering manually. The driver sets img7 to a mask clearing + * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: + * s_and_b32 samp0, samp0, img7 + * + * VI: + * The ANISO_OVERRIDE sampler field enables this fix in TA. + */ +static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, + LLVMValueRef res, LLVMValueRef samp) { - int i, j; - ctx->num_locals = 0; - nir_foreach_variable(variable, &func->impl->locals) { - unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - variable->data.driver_location = ctx->num_locals * 4; - variable->data.location_frac = 0; - ctx->num_locals += attrib_count; - } - ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); - if (!ctx->locals) - return; + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef img7, samp0; - for (i = 0; i < ctx->num_locals; i++) { - for (j = 0; j < 4; j++) { - ctx->locals[i * 4 + j] = - ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp"); - } - } + if (ctx->ac.chip_class >= VI) + return samp; + + img7 = LLVMBuildExtractElement(builder, res, + LLVMConstInt(ctx->ac.i32, 7, 0), ""); + samp0 = LLVMBuildExtractElement(builder, samp, + LLVMConstInt(ctx->ac.i32, 0, 0), ""); + samp0 = LLVMBuildAnd(builder, samp0, img7, ""); + return LLVMBuildInsertElement(builder, samp, samp0, + LLVMConstInt(ctx->ac.i32, 0, 0), ""); } -static void -setup_shared(struct ac_nir_context *ctx, - struct nir_shader *nir) +static void tex_fetch_ptrs(struct ac_nir_context *ctx, + nir_tex_instr *instr, + LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, + LLVMValueRef *fmask_ptr) { - nir_foreach_variable(variable, &nir->shared) { - LLVMValueRef shared = - LLVMAddGlobalInAddressSpace( - ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type), - variable->name ? variable->name : "", - AC_LOCAL_ADDR_SPACE); - _mesa_hash_table_insert(ctx->vars, variable, shared); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) + *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, instr, false, false); + else + *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, instr, false, false); + if (samp_ptr) { + if (instr->sampler) + *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, instr, false, false); + else + *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, instr, false, false); + if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT) + *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); } + if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms || + instr->op == nir_texop_samples_identical)) + *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false); } -/* Initialize arguments for the shader export intrinsic */ -static void -si_llvm_init_export_args(struct radv_shader_context *ctx, - LLVMValueRef *values, - unsigned enabled_channels, - unsigned target, - struct ac_export_args *args) +static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, + LLVMValueRef coord) { - /* Specify the channels that are enabled. */ - args->enabled_channels = enabled_channels; - - /* Specify whether the EXEC mask represents the valid mask */ - args->valid_mask = 0; - - /* Specify whether this is the last export */ - args->done = 0; - - /* Specify the target we are exporting */ - args->target = target; - - args->compr = false; - args->out[0] = LLVMGetUndef(ctx->ac.f32); - args->out[1] = LLVMGetUndef(ctx->ac.f32); - args->out[2] = LLVMGetUndef(ctx->ac.f32); - args->out[3] = LLVMGetUndef(ctx->ac.f32); - - if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) { - unsigned index = target - V_008DFC_SQ_EXP_MRT; - unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; - bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; - bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1; - unsigned chan; - - LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL; - LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2], - unsigned bits, bool hi) = NULL; - - switch(col_format) { - case V_028714_SPI_SHADER_ZERO: - args->enabled_channels = 0; /* writemask */ - args->target = V_008DFC_SQ_EXP_NULL; - break; + coord = ac_to_float(ctx, coord); + coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0); + coord = ac_to_integer(ctx, coord); + return coord; +} - case V_028714_SPI_SHADER_32_R: - args->enabled_channels = 1; - args->out[0] = values[0]; - break; +static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) +{ + LLVMValueRef result = NULL; + struct ac_image_args args = { 0 }; + unsigned dmask = 0xf; + LLVMValueRef address[16]; + LLVMValueRef coords[5]; + LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; + LLVMValueRef bias = NULL, offsets = NULL; + LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; + LLVMValueRef ddx = NULL, ddy = NULL; + LLVMValueRef derivs[6]; + unsigned chan, count = 0; + unsigned const_src = 0, num_deriv_comp = 0; + bool lod_is_zero = false; - case V_028714_SPI_SHADER_32_GR: - args->enabled_channels = 0x3; - args->out[0] = values[0]; - args->out[1] = values[1]; - break; + tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); - case V_028714_SPI_SHADER_32_AR: - args->enabled_channels = 0x9; - args->out[0] = values[0]; - args->out[3] = values[3]; + for (unsigned i = 0; i < instr->num_srcs; i++) { + switch (instr->src[i].src_type) { + case nir_tex_src_coord: + coord = get_src(ctx, instr->src[i].src); break; - - case V_028714_SPI_SHADER_FP16_ABGR: - args->enabled_channels = 0x5; - packf = ac_build_cvt_pkrtz_f16; + case nir_tex_src_projector: break; - - case V_028714_SPI_SHADER_UNORM16_ABGR: - args->enabled_channels = 0x5; - packf = ac_build_cvt_pknorm_u16; + case nir_tex_src_comparator: + comparator = get_src(ctx, instr->src[i].src); break; - - case V_028714_SPI_SHADER_SNORM16_ABGR: - args->enabled_channels = 0x5; - packf = ac_build_cvt_pknorm_i16; + case nir_tex_src_offset: + offsets = get_src(ctx, instr->src[i].src); + const_src = i; break; - - case V_028714_SPI_SHADER_UINT16_ABGR: - args->enabled_channels = 0x5; - packi = ac_build_cvt_pk_u16; + case nir_tex_src_bias: + bias = get_src(ctx, instr->src[i].src); break; + case nir_tex_src_lod: { + nir_const_value *val = nir_src_as_const_value(instr->src[i].src); - case V_028714_SPI_SHADER_SINT16_ABGR: - args->enabled_channels = 0x5; - packi = ac_build_cvt_pk_i16; + if (val && val->i32[0] == 0) + lod_is_zero = true; + lod = get_src(ctx, instr->src[i].src); break; - + } + case nir_tex_src_ms_index: + sample_index = get_src(ctx, instr->src[i].src); + break; + case nir_tex_src_ms_mcs: + break; + case nir_tex_src_ddx: + ddx = get_src(ctx, instr->src[i].src); + num_deriv_comp = instr->src[i].src.ssa->num_components; + break; + case nir_tex_src_ddy: + ddy = get_src(ctx, instr->src[i].src); + break; + case nir_tex_src_texture_offset: + case nir_tex_src_sampler_offset: + case nir_tex_src_plane: default: - case V_028714_SPI_SHADER_32_ABGR: - memcpy(&args->out[0], values, sizeof(values[0]) * 4); break; } - - /* Pack f16 or norm_i16/u16. */ - if (packf) { - for (chan = 0; chan < 2; chan++) { - LLVMValueRef pack_args[2] = { - values[2 * chan], - values[2 * chan + 1] - }; - LLVMValueRef packed; - - packed = packf(&ctx->ac, pack_args); - args->out[chan] = ac_to_float(&ctx->ac, packed); - } - args->compr = 1; /* COMPR flag */ - } - - /* Pack i16/u16. */ - if (packi) { - for (chan = 0; chan < 2; chan++) { - LLVMValueRef pack_args[2] = { - ac_to_integer(&ctx->ac, values[2 * chan]), - ac_to_integer(&ctx->ac, values[2 * chan + 1]) - }; - LLVMValueRef packed; - - packed = packi(&ctx->ac, pack_args, - is_int8 ? 8 : is_int10 ? 10 : 16, - chan == 1); - args->out[chan] = ac_to_float(&ctx->ac, packed); - } - args->compr = 1; /* COMPR flag */ - } - return; } - memcpy(&args->out[0], values, sizeof(values[0]) * 4); - - for (unsigned i = 0; i < 4; ++i) { - if (!(args->enabled_channels & (1 << i))) - continue; - - args->out[i] = ac_to_float(&ctx->ac, args->out[i]); + if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + result = get_buffer_size(ctx, res_ptr, true); + goto write_result; } -} - -static void -radv_export_param(struct radv_shader_context *ctx, unsigned index, - LLVMValueRef *values, unsigned enabled_channels) -{ - struct ac_export_args args; - si_llvm_init_export_args(ctx, values, enabled_channels, - V_008DFC_SQ_EXP_PARAM + index, &args); - ac_build_export(&ctx->ac, &args); -} + if (instr->op == nir_texop_texture_samples) { + LLVMValueRef res, samples, is_msaa; + res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); + samples = LLVMBuildExtractElement(ctx->ac.builder, res, + LLVMConstInt(ctx->ac.i32, 3, false), ""); + is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 28, false), ""); + is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, + LLVMConstInt(ctx->ac.i32, 0xe, false), ""); + is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa, + LLVMConstInt(ctx->ac.i32, 0xe, false), ""); -static LLVMValueRef -radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) -{ - LLVMValueRef output = - ctx->abi.outputs[radeon_llvm_reg_index_soa(index, chan)]; + samples = LLVMBuildLShr(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 16, false), ""); + samples = LLVMBuildAnd(ctx->ac.builder, samples, + LLVMConstInt(ctx->ac.i32, 0xf, false), ""); + samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, + samples, ""); + samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, + ctx->ac.i32_1, ""); + result = samples; + goto write_result; + } - return LLVMBuildLoad(ctx->ac.builder, output, ""); -} + if (coord) + for (chan = 0; chan < instr->coord_components; chan++) + coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); -static void -handle_vs_outputs_post(struct radv_shader_context *ctx, - bool export_prim_id, - struct ac_vs_output_info *outinfo) -{ - uint32_t param_count = 0; - unsigned target; - unsigned pos_idx, num_pos_exports = 0; - struct ac_export_args args, pos_args[4] = {}; - LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; - int i; + if (offsets && instr->op != nir_texop_txf) { + LLVMValueRef offset[3], pack; + for (chan = 0; chan < 3; ++chan) + offset[chan] = ctx->ac.i32_0; - if (ctx->options->key.has_multiview_view_index) { - LLVMValueRef* tmp_out = &ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; - if(!*tmp_out) { - for(unsigned i = 0; i < 4; ++i) - ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] = - ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); + args.offset = true; + for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { + offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); + offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], + LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); + if (chan) + offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], + LLVMConstInt(ctx->ac.i32, chan * 8, false), ""); } + pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); + pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); + address[count++] = pack; - LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, ctx->abi.view_index), *tmp_out); - ctx->output_mask |= 1ull << VARYING_SLOT_LAYER; + } + /* pack LOD bias value */ + if (instr->op == nir_texop_txb && bias) { + address[count++] = bias; } - memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, - sizeof(outinfo->vs_output_param_offset)); - - if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { - LLVMValueRef slots[8]; - unsigned j; - - if (outinfo->cull_dist_mask) - outinfo->cull_dist_mask <<= ctx->num_output_clips; + /* Pack depth comparison value */ + if (instr->is_shadow && comparator) { + LLVMValueRef z = ac_to_float(&ctx->ac, + ac_llvm_extract_elem(&ctx->ac, comparator, 0)); - i = VARYING_SLOT_CLIP_DIST0; - for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) - slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); + /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, + * so the depth comparison value isn't clamped for Z16 and + * Z24 anymore. Do it manually here. + * + * It's unnecessary if the original texture format was + * Z32_FLOAT, but we don't know that here. + */ + if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) + z = ac_build_clamp(&ctx->ac, z); - for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) - slots[i] = LLVMGetUndef(ctx->ac.f32); + address[count++] = z; + } - if (ctx->num_output_clips + ctx->num_output_culls > 4) { - target = V_008DFC_SQ_EXP_POS + 3; - si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args); - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); + /* pack derivatives */ + if (ddx || ddy) { + int num_src_deriv_channels, num_dest_deriv_channels; + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + num_deriv_comp = 3; + num_src_deriv_channels = 3; + num_dest_deriv_channels = 3; + break; + case GLSL_SAMPLER_DIM_2D: + default: + num_src_deriv_channels = 2; + num_dest_deriv_channels = 2; + num_deriv_comp = 2; + break; + case GLSL_SAMPLER_DIM_1D: + num_src_deriv_channels = 1; + if (ctx->ac.chip_class >= GFX9) { + num_dest_deriv_channels = 2; + num_deriv_comp = 2; + } else { + num_dest_deriv_channels = 1; + num_deriv_comp = 1; + } + break; } - target = V_008DFC_SQ_EXP_POS + 2; - si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args); - memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], - &args, sizeof(args)); - + for (unsigned i = 0; i < num_src_deriv_channels; i++) { + derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); + derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); + } + for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { + derivs[i] = ctx->ac.f32_0; + derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; + } } - LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; - if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { - for (unsigned j = 0; j < 4; j++) - pos_values[j] = radv_load_output(ctx, VARYING_SLOT_POS, j); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { + for (chan = 0; chan < instr->coord_components; chan++) + coords[chan] = ac_to_float(&ctx->ac, coords[chan]); + if (instr->coord_components == 3) + coords[3] = LLVMGetUndef(ctx->ac.f32); + ac_prepare_cube_coords(&ctx->ac, + instr->op == nir_texop_txd, instr->is_array, + instr->op == nir_texop_lod, coords, derivs); + if (num_deriv_comp) + num_deriv_comp--; } - si_llvm_init_export_args(ctx, pos_values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]); - if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { - outinfo->writes_pointsize = true; - psize_value = radv_load_output(ctx, VARYING_SLOT_PSIZ, 0); + if (ddx || ddy) { + for (unsigned i = 0; i < num_deriv_comp * 2; i++) + address[count++] = derivs[i]; } - if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { - outinfo->writes_layer = true; - layer_value = radv_load_output(ctx, VARYING_SLOT_LAYER, 0); - } + /* Pack texture coordinates */ + if (coord) { + address[count++] = coords[0]; + if (instr->coord_components > 1) { + if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { + coords[1] = apply_round_slice(&ctx->ac, coords[1]); + } + address[count++] = coords[1]; + } + if (instr->coord_components > 2) { + if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D || + instr->sampler_dim == GLSL_SAMPLER_DIM_MS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && + instr->is_array && + instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { + coords[2] = apply_round_slice(&ctx->ac, coords[2]); + } + address[count++] = coords[2]; + } - if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { - outinfo->writes_viewport_index = true; - viewport_index_value = radv_load_output(ctx, VARYING_SLOT_VIEWPORT, 0); - } + if (ctx->ac.chip_class >= GFX9) { + LLVMValueRef filler; + if (instr->op == nir_texop_txf) + filler = ctx->ac.i32_0; + else + filler = LLVMConstReal(ctx->ac.f32, 0.5); - if (outinfo->writes_pointsize || - outinfo->writes_layer || - outinfo->writes_viewport_index) { - pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) | - (outinfo->writes_layer == true ? 4 : 0)); - pos_args[1].valid_mask = 0; - pos_args[1].done = 0; - pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; - pos_args[1].compr = 0; - pos_args[1].out[0] = ctx->ac.f32_0; /* X */ - pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ - pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ - pos_args[1].out[3] = ctx->ac.f32_0; /* W */ - - if (outinfo->writes_pointsize == true) - pos_args[1].out[0] = psize_value; - if (outinfo->writes_layer == true) - pos_args[1].out[2] = layer_value; - if (outinfo->writes_viewport_index == true) { - if (ctx->options->chip_class >= GFX9) { - /* GFX9 has the layer in out.z[10:0] and the viewport - * index in out.z[19:16]. - */ - LLVMValueRef v = viewport_index_value; - v = ac_to_integer(&ctx->ac, v); - v = LLVMBuildShl(ctx->ac.builder, v, - LLVMConstInt(ctx->ac.i32, 16, false), - ""); - v = LLVMBuildOr(ctx->ac.builder, v, - ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); - - pos_args[1].out[2] = ac_to_float(&ctx->ac, v); - pos_args[1].enabled_channels |= 1 << 2; - } else { - pos_args[1].out[3] = viewport_index_value; - pos_args[1].enabled_channels |= 1 << 3; + if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { + /* No nir_texop_lod, because it does not take a slice + * even with array textures. */ + if (instr->is_array && instr->op != nir_texop_lod ) { + address[count] = address[count - 1]; + address[count - 1] = filler; + count++; + } else + address[count++] = filler; } } } - for (i = 0; i < 4; i++) { - if (pos_args[i].out[0]) - num_pos_exports++; - } - - pos_idx = 0; - for (i = 0; i < 4; i++) { - if (!pos_args[i].out[0]) - continue; - /* Specify the target we are exporting */ - pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; - if (pos_idx == num_pos_exports) - pos_args[i].done = 1; - ac_build_export(&ctx->ac, &pos_args[i]); + /* Pack LOD */ + if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) { + address[count++] = lod; + } else if (instr->op == nir_texop_txf_ms && sample_index) { + address[count++] = sample_index; + } else if(instr->op == nir_texop_txs) { + count = 0; + if (lod) + address[count++] = lod; + else + address[count++] = ctx->ac.i32_0; } - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef values[4]; - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i != VARYING_SLOT_LAYER && - i != VARYING_SLOT_PRIMITIVE_ID && - i < VARYING_SLOT_VAR0) - continue; + for (chan = 0; chan < count; chan++) { + address[chan] = LLVMBuildBitCast(ctx->ac.builder, + address[chan], ctx->ac.i32, ""); + } - for (unsigned j = 0; j < 4; j++) - values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); + if (instr->op == nir_texop_samples_identical) { + LLVMValueRef txf_address[4]; + struct ac_image_args txf_args = { 0 }; + unsigned txf_count = count; + memcpy(txf_address, address, sizeof(txf_address)); - unsigned output_usage_mask; + if (!instr->is_array) + txf_address[2] = ctx->ac.i32_0; + txf_address[3] = ctx->ac.i32_0; - if (ctx->stage == MESA_SHADER_VERTEX && - !ctx->is_gs_copy_shader) { - output_usage_mask = - ctx->shader_info->info.vs.output_usage_mask[i]; - } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { - output_usage_mask = - ctx->shader_info->info.tes.output_usage_mask[i]; - } else { - /* Enable all channels for the GS copy shader because - * we don't know the output usage mask currently. - */ - output_usage_mask = 0xf; - } + set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, + fmask_ptr, NULL, + txf_address, txf_count, 0xf); - radv_export_param(ctx, param_count, values, output_usage_mask); + result = build_tex_intrinsic(ctx, instr, false, &txf_args); - outinfo->vs_output_param_offset[i] = param_count++; + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); + goto write_result; } - if (export_prim_id) { - LLVMValueRef values[4]; - - values[0] = ctx->vs_prim_id; - ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2, - ctx->shader_info->vs.vgpr_comp_cnt); - for (unsigned j = 1; j < 4; j++) - values[j] = ctx->ac.f32_0; - - radv_export_param(ctx, param_count, values, 0xf); - - outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++; - outinfo->export_prim_id = true; + if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && + instr->op != nir_texop_txs) { + unsigned sample_chan = instr->is_array ? 3 : 2; + address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, + address[0], + address[1], + instr->is_array ? address[2] : NULL, + address[sample_chan], + fmask_ptr); } - outinfo->pos_exports = num_pos_exports; - outinfo->param_exports = param_count; -} - -static void -handle_es_outputs_post(struct radv_shader_context *ctx, - struct ac_es_output_info *outinfo) -{ - int j; - uint64_t max_output_written = 0; - LLVMValueRef lds_base = NULL; - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - int param_index; - int length = 4; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips + ctx->num_output_culls; - - param_index = shader_io_get_unique_index(i); + if (offsets && instr->op == nir_texop_txf) { + nir_const_value *const_offset = + nir_src_as_const_value(instr->src[const_src].src); + int num_offsets = instr->src[const_src].src.ssa->num_components; + assert(const_offset); + num_offsets = MIN2(num_offsets, instr->coord_components); + if (num_offsets > 2) + address[2] = LLVMBuildAdd(ctx->ac.builder, + address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); + if (num_offsets > 1) + address[1] = LLVMBuildAdd(ctx->ac.builder, + address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); + address[0] = LLVMBuildAdd(ctx->ac.builder, + address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); - max_output_written = MAX2(param_index + (length > 4), max_output_written); } - outinfo->esgs_itemsize = (max_output_written + 1) * 16; - - if (ctx->ac.chip_class >= GFX9) { - unsigned itemsize_dw = outinfo->esgs_itemsize / 4; - LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); - LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, - LLVMConstInt(ctx->ac.i32, 24, false), - LLVMConstInt(ctx->ac.i32, 4, false), false); - vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, - LLVMBuildMul(ctx->ac.builder, wave_idx, - LLVMConstInt(ctx->ac.i32, 64, false), ""), ""); - lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, - LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), ""); + /* TODO TG4 support */ + if (instr->op == nir_texop_tg4) { + if (instr->is_shadow) + dmask = 1; + else + dmask = 1 << instr->component; } + set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, + res_ptr, samp_ptr, address, count, dmask); - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef dw_addr = NULL; - LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; - int param_index; - int length = 4; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips + ctx->num_output_culls; + result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); - param_index = shader_io_get_unique_index(i); + if (instr->op == nir_texop_query_levels) + result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); + else if (instr->is_shadow && instr->is_new_style_shadow && + instr->op != nir_texop_txs && instr->op != nir_texop_lod && + instr->op != nir_texop_tg4) + result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); + else if (instr->op == nir_texop_txs && + instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && + instr->is_array) { + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false); + LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); + z = LLVMBuildSDiv(ctx->ac.builder, z, six, ""); + result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, ""); + } else if (ctx->ac.chip_class >= GFX9 && + instr->op == nir_texop_txs && + instr->sampler_dim == GLSL_SAMPLER_DIM_1D && + instr->is_array) { + LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false); + LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, ""); + result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, + ctx->ac.i32_1, ""); + } else if (instr->dest.ssa.num_components != 4) + result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components); - if (lds_base) { - dw_addr = LLVMBuildAdd(ctx->ac.builder, lds_base, - LLVMConstInt(ctx->ac.i32, param_index * 4, false), - ""); - } - for (j = 0; j < length; j++) { - LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); - out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); - - if (ctx->ac.chip_class >= GFX9) { - ac_lds_store(&ctx->ac, dw_addr, - LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); - } else { - ac_build_buffer_store_dword(&ctx->ac, - ctx->esgs_ring, - out_val, 1, - NULL, ctx->es2gs_offset, - (4 * param_index + j) * 4, - 1, 1, true, true); - } - } +write_result: + if (result) { + assert(instr->dest.is_ssa); + result = ac_to_integer(&ctx->ac, result); + _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); } } -static void -handle_ls_outputs_post(struct radv_shader_context *ctx) -{ - LLVMValueRef vertex_id = ctx->rel_auto_id; - LLVMValueRef vertex_dw_stride = ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8); - LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, - vertex_dw_stride, ""); - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; - int length = 4; - if (!(ctx->output_mask & (1ull << i))) - continue; +static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr) +{ + LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa); + LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, ""); - if (i == VARYING_SLOT_CLIP_DIST0) - length = ctx->num_output_clips + ctx->num_output_culls; - int param = shader_io_get_unique_index(i); - mark_tess_output(ctx, false, param); - if (length > 4) - mark_tess_output(ctx, false, param + 1); - LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, - LLVMConstInt(ctx->ac.i32, param * 4, false), - ""); - for (unsigned j = 0; j < length; j++) { - ac_lds_store(&ctx->ac, dw_addr, - LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); - dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); - } - } + _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result); + _mesa_hash_table_insert(ctx->phis, instr, result); } -struct ac_build_if_state +static void visit_post_phi(struct ac_nir_context *ctx, + nir_phi_instr *instr, + LLVMValueRef llvm_phi) { - struct radv_shader_context *ctx; - LLVMValueRef condition; - LLVMBasicBlockRef entry_block; - LLVMBasicBlockRef true_block; - LLVMBasicBlockRef false_block; - LLVMBasicBlockRef merge_block; -}; + nir_foreach_phi_src(src, instr) { + LLVMBasicBlockRef block = get_block(ctx, src->pred); + LLVMValueRef llvm_src = get_src(ctx, src->src); -static LLVMBasicBlockRef -ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name) -{ - LLVMBasicBlockRef current_block; - LLVMBasicBlockRef next_block; - LLVMBasicBlockRef new_block; - - /* get current basic block */ - current_block = LLVMGetInsertBlock(ctx->ac.builder); - - /* chqeck if there's another block after this one */ - next_block = LLVMGetNextBasicBlock(current_block); - if (next_block) { - /* insert the new block before the next block */ - new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); - } - else { - /* append new block after current block */ - LLVMValueRef function = LLVMGetBasicBlockParent(current_block); - new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); + LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1); } - return new_block; } -static void -ac_nir_build_if(struct ac_build_if_state *ifthen, - struct radv_shader_context *ctx, - LLVMValueRef condition) +static void phi_post_pass(struct ac_nir_context *ctx) { - LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder); - - memset(ifthen, 0, sizeof *ifthen); - ifthen->ctx = ctx; - ifthen->condition = condition; - ifthen->entry_block = block; - - /* create endif/merge basic block for the phi functions */ - ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); - - /* create/insert true_block before merge_block */ - ifthen->true_block = - LLVMInsertBasicBlockInContext(ctx->context, - ifthen->merge_block, - "if-true-block"); - - /* successive code goes into the true block */ - LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block); + struct hash_entry *entry; + hash_table_foreach(ctx->phis, entry) { + visit_post_phi(ctx, (nir_phi_instr*)entry->key, + (LLVMValueRef)entry->data); + } } -/** - * End a conditional. - */ -static void -ac_nir_build_endif(struct ac_build_if_state *ifthen) -{ - LLVMBuilderRef builder = ifthen->ctx->ac.builder; - - /* Insert branch to the merge block from current block */ - LLVMBuildBr(builder, ifthen->merge_block); - /* - * Now patch in the various branch instructions. - */ +static void visit_ssa_undef(struct ac_nir_context *ctx, + const nir_ssa_undef_instr *instr) +{ + unsigned num_components = instr->def.num_components; + LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size); + LLVMValueRef undef; - /* Insert the conditional branch instruction at the end of entry_block */ - LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); - if (ifthen->false_block) { - /* we have an else clause */ - LLVMBuildCondBr(builder, ifthen->condition, - ifthen->true_block, ifthen->false_block); - } + if (num_components == 1) + undef = LLVMGetUndef(type); else { - /* no else clause */ - LLVMBuildCondBr(builder, ifthen->condition, - ifthen->true_block, ifthen->merge_block); + undef = LLVMGetUndef(LLVMVectorType(type, num_components)); } - - /* Resume building code at end of the ifthen->merge_block */ - LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); + _mesa_hash_table_insert(ctx->defs, &instr->def, undef); } -static void -write_tess_factors(struct radv_shader_context *ctx) +static void visit_jump(struct ac_llvm_context *ctx, + const nir_jump_instr *instr) { - unsigned stride, outer_comps, inner_comps; - struct ac_build_if_state if_ctx, inner_if_ctx; - LLVMValueRef invocation_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5); - LLVMValueRef rel_patch_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); - unsigned tess_inner_index = 0, tess_outer_index; - LLVMValueRef lds_base, lds_inner = NULL, lds_outer, byteoffset, buffer; - LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; - int i; - ac_emit_barrier(&ctx->ac, ctx->stage); - - switch (ctx->options->key.tcs.primitive_mode) { - case GL_ISOLINES: - stride = 2; - outer_comps = 2; - inner_comps = 0; - break; - case GL_TRIANGLES: - stride = 4; - outer_comps = 3; - inner_comps = 1; + switch (instr->type) { + case nir_jump_break: + ac_build_break(ctx); break; - case GL_QUADS: - stride = 6; - outer_comps = 4; - inner_comps = 2; + case nir_jump_continue: + ac_build_continue(ctx); break; default: - return; - } - - ac_nir_build_if(&if_ctx, ctx, - LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, - invocation_id, ctx->ac.i32_0, "")); - - lds_base = get_tcs_out_current_patch_data_offset(ctx); - - if (inner_comps) { - tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); - mark_tess_output(ctx, true, tess_inner_index); - lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base, - LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), ""); + fprintf(stderr, "Unknown NIR jump instr: "); + nir_print_instr(&instr->instr, stderr); + fprintf(stderr, "\n"); + abort(); } +} - tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); - mark_tess_output(ctx, true, tess_outer_index); - lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base, - LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), ""); - - for (i = 0; i < 4; i++) { - inner[i] = LLVMGetUndef(ctx->ac.i32); - outer[i] = LLVMGetUndef(ctx->ac.i32); - } +static void visit_cf_list(struct ac_nir_context *ctx, + struct exec_list *list); - // LINES reverseal - if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { - outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer); - lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, - ctx->ac.i32_1, ""); - outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer); - } else { - for (i = 0; i < outer_comps; i++) { - outer[i] = out[i] = - ac_lds_load(&ctx->ac, lds_outer); - lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, - ctx->ac.i32_1, ""); - } - for (i = 0; i < inner_comps; i++) { - inner[i] = out[outer_comps+i] = - ac_lds_load(&ctx->ac, lds_inner); - lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_inner, - ctx->ac.i32_1, ""); +static void visit_block(struct ac_nir_context *ctx, nir_block *block) +{ + LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder); + nir_foreach_instr(instr, block) + { + switch (instr->type) { + case nir_instr_type_alu: + visit_alu(ctx, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: + visit_load_const(ctx, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_intrinsic: + visit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + visit_tex(ctx, nir_instr_as_tex(instr)); + break; + case nir_instr_type_phi: + visit_phi(ctx, nir_instr_as_phi(instr)); + break; + case nir_instr_type_ssa_undef: + visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr)); + break; + case nir_instr_type_jump: + visit_jump(&ctx->ac, nir_instr_as_jump(instr)); + break; + default: + fprintf(stderr, "Unknown NIR instr type: "); + nir_print_instr(instr, stderr); + fprintf(stderr, "\n"); + abort(); } } - /* Convert the outputs to vectors for stores. */ - vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4)); - vec1 = NULL; + _mesa_hash_table_insert(ctx->defs, block, llvm_block); +} - if (stride > 4) - vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); +static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt) +{ + LLVMValueRef value = get_src(ctx, if_stmt->condition); + nir_block *then_block = + (nir_block *) exec_list_get_head(&if_stmt->then_list); - buffer = ctx->hs_ring_tess_factor; - tf_base = ctx->tess_factor_offset; - byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id, - LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); - unsigned tf_offset = 0; + ac_build_uif(&ctx->ac, value, then_block->index); - if (ctx->options->chip_class <= VI) { - ac_nir_build_if(&inner_if_ctx, ctx, - LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, - rel_patch_id, ctx->ac.i32_0, "")); + visit_cf_list(ctx, &if_stmt->then_list); - /* Store the dynamic HS control word. */ - ac_build_buffer_store_dword(&ctx->ac, buffer, - LLVMConstInt(ctx->ac.i32, 0x80000000, false), - 1, ctx->ac.i32_0, tf_base, - 0, 1, 0, true, false); - tf_offset += 4; + if (!exec_list_is_empty(&if_stmt->else_list)) { + nir_block *else_block = + (nir_block *) exec_list_get_head(&if_stmt->else_list); - ac_nir_build_endif(&inner_if_ctx); + ac_build_else(&ctx->ac, else_block->index); + visit_cf_list(ctx, &if_stmt->else_list); } - /* Store the tessellation factors. */ - ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, - MIN2(stride, 4), byteoffset, tf_base, - tf_offset, 1, 0, true, false); - if (vec1) - ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, - stride - 4, byteoffset, tf_base, - 16 + tf_offset, 1, 0, true, false); - - //store to offchip for TES to read - only if TES reads them - if (ctx->options->key.tcs.tes_reads_tess_factors) { - LLVMValueRef inner_vec, outer_vec, tf_outer_offset; - LLVMValueRef tf_inner_offset; - unsigned param_outer, param_inner; - - param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); - tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL, - LLVMConstInt(ctx->ac.i32, param_outer, 0)); - - outer_vec = ac_build_gather_values(&ctx->ac, outer, - util_next_power_of_two(outer_comps)); - - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, - outer_comps, tf_outer_offset, - ctx->oc_lds, 0, 1, 0, true, false); - if (inner_comps) { - param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); - tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, - LLVMConstInt(ctx->ac.i32, param_inner, 0)); - - inner_vec = inner_comps == 1 ? inner[0] : - ac_build_gather_values(&ctx->ac, inner, inner_comps); - ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, - inner_comps, tf_inner_offset, - ctx->oc_lds, 0, 1, 0, true, false); - } - } - ac_nir_build_endif(&if_ctx); + ac_build_endif(&ctx->ac, then_block->index); } -static void -handle_tcs_outputs_post(struct radv_shader_context *ctx) +static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop) { - write_tess_factors(ctx); -} + nir_block *first_loop_block = + (nir_block *) exec_list_get_head(&loop->body); -static bool -si_export_mrt_color(struct radv_shader_context *ctx, - LLVMValueRef *color, unsigned index, - struct ac_export_args *args) -{ - /* Export */ - si_llvm_init_export_args(ctx, color, 0xf, - V_008DFC_SQ_EXP_MRT + index, args); - if (!args->enabled_channels) - return false; /* unnecessary NULL export */ + ac_build_bgnloop(&ctx->ac, first_loop_block->index); + + visit_cf_list(ctx, &loop->body); - return true; + ac_build_endloop(&ctx->ac, first_loop_block->index); } -static void -radv_export_mrt_z(struct radv_shader_context *ctx, - LLVMValueRef depth, LLVMValueRef stencil, - LLVMValueRef samplemask) +static void visit_cf_list(struct ac_nir_context *ctx, + struct exec_list *list) { - struct ac_export_args args; + foreach_list_typed(nir_cf_node, node, node, list) + { + switch (node->type) { + case nir_cf_node_block: + visit_block(ctx, nir_cf_node_as_block(node)); + break; + + case nir_cf_node_if: + visit_if(ctx, nir_cf_node_as_if(node)); + break; - ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args); + case nir_cf_node_loop: + visit_loop(ctx, nir_cf_node_as_loop(node)); + break; - ac_build_export(&ctx->ac, &args); + default: + assert(0); + } + } } -static void -handle_fs_outputs_post(struct radv_shader_context *ctx) +void +ac_handle_shader_output_decl(struct ac_llvm_context *ctx, + struct ac_shader_abi *abi, + struct nir_shader *nir, + struct nir_variable *variable, + gl_shader_stage stage) { - unsigned index = 0; - LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; - struct ac_export_args color_args[8]; - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - LLVMValueRef values[4]; - - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i < FRAG_RESULT_DATA0) - continue; + unsigned output_loc = variable->data.driver_location / 4; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); - for (unsigned j = 0; j < 4; j++) - values[j] = ac_to_float(&ctx->ac, - radv_load_output(ctx, i, j)); + /* tess ctrl has it's own load/store paths for outputs */ + if (stage == MESA_SHADER_TESS_CTRL) + return; - bool ret = si_export_mrt_color(ctx, values, - i - FRAG_RESULT_DATA0, - &color_args[index]); - if (ret) - index++; - } + if (stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY) { + int idx = variable->data.location + variable->data.index; + if (idx == VARYING_SLOT_CLIP_DIST0) { + int length = nir->info.clip_distance_array_size + + nir->info.cull_distance_array_size; - /* Process depth, stencil, samplemask. */ - if (ctx->shader_info->info.ps.writes_z) { - depth = ac_to_float(&ctx->ac, - radv_load_output(ctx, FRAG_RESULT_DEPTH, 0)); - } - if (ctx->shader_info->info.ps.writes_stencil) { - stencil = ac_to_float(&ctx->ac, - radv_load_output(ctx, FRAG_RESULT_STENCIL, 0)); - } - if (ctx->shader_info->info.ps.writes_sample_mask) { - samplemask = ac_to_float(&ctx->ac, - radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); + if (length > 4) + attrib_count = 2; + else + attrib_count = 1; + } } - /* Set the DONE bit on last non-null color export only if Z isn't - * exported. - */ - if (index > 0 && - !ctx->shader_info->info.ps.writes_z && - !ctx->shader_info->info.ps.writes_stencil && - !ctx->shader_info->info.ps.writes_sample_mask) { - unsigned last = index - 1; - - color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */ - color_args[last].done = 1; /* DONE bit */ + for (unsigned i = 0; i < attrib_count; ++i) { + for (unsigned chan = 0; chan < 4; chan++) { + abi->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] = + ac_build_alloca_undef(ctx, ctx->f32, ""); + } } - - /* Export PS outputs. */ - for (unsigned i = 0; i < index; i++) - ac_build_export(&ctx->ac, &color_args[i]); - - if (depth || stencil || samplemask) - radv_export_mrt_z(ctx, depth, stencil, samplemask); - else if (!index) - ac_build_export_null(&ctx->ac); -} - -static void -emit_gs_epilogue(struct radv_shader_context *ctx) -{ - ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id); } -static void -handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, - LLVMValueRef *addrs) +static LLVMTypeRef +glsl_base_to_llvm_type(struct ac_llvm_context *ac, + enum glsl_base_type type) { - struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); - - switch (ctx->stage) { - case MESA_SHADER_VERTEX: - if (ctx->options->key.vs.as_ls) - handle_ls_outputs_post(ctx); - else if (ctx->options->key.vs.as_es) - handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); - else - handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, - &ctx->shader_info->vs.outinfo); - break; - case MESA_SHADER_FRAGMENT: - handle_fs_outputs_post(ctx); - break; - case MESA_SHADER_GEOMETRY: - emit_gs_epilogue(ctx); - break; - case MESA_SHADER_TESS_CTRL: - handle_tcs_outputs_post(ctx); - break; - case MESA_SHADER_TESS_EVAL: - if (ctx->options->key.tes.as_es) - handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info); - else - handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, - &ctx->shader_info->tes.outinfo); - break; + switch (type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SUBROUTINE: + return ac->i32; + case GLSL_TYPE_FLOAT: /* TODO handle mediump */ + return ac->f32; + case GLSL_TYPE_INT64: + case GLSL_TYPE_UINT64: + return ac->i64; + case GLSL_TYPE_DOUBLE: + return ac->f64; default: - break; + unreachable("unknown GLSL type"); } } -static void ac_llvm_finalize_module(struct radv_shader_context *ctx) -{ - LLVMPassManagerRef passmgr; - /* Create the pass manager */ - passmgr = LLVMCreateFunctionPassManagerForModule( - ctx->ac.module); - - /* This pass should eliminate all the load and store instructions */ - LLVMAddPromoteMemoryToRegisterPass(passmgr); - - /* Add some optimization passes */ - LLVMAddScalarReplAggregatesPass(passmgr); - LLVMAddLICMPass(passmgr); - LLVMAddAggressiveDCEPass(passmgr); - LLVMAddCFGSimplificationPass(passmgr); - LLVMAddInstructionCombiningPass(passmgr); - - /* Run the pass */ - LLVMInitializeFunctionPassManager(passmgr); - LLVMRunFunctionPassManager(passmgr, ctx->main_function); - LLVMFinalizeFunctionPassManager(passmgr); - - LLVMDisposeBuilder(ctx->ac.builder); - LLVMDisposePassManager(passmgr); - - ac_llvm_context_dispose(&ctx->ac); -} - -static void -ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) +static LLVMTypeRef +glsl_to_llvm_type(struct ac_llvm_context *ac, + const struct glsl_type *type) { - struct ac_vs_output_info *outinfo; - - switch (ctx->stage) { - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_GEOMETRY: - return; - case MESA_SHADER_VERTEX: - if (ctx->options->key.vs.as_ls || - ctx->options->key.vs.as_es) - return; - outinfo = &ctx->shader_info->vs.outinfo; - break; - case MESA_SHADER_TESS_EVAL: - if (ctx->options->key.vs.as_es) - return; - outinfo = &ctx->shader_info->tes.outinfo; - break; - default: - unreachable("Unhandled shader type"); + if (glsl_type_is_scalar(type)) { + return glsl_base_to_llvm_type(ac, glsl_get_base_type(type)); } - ac_optimize_vs_outputs(&ctx->ac, - ctx->main_function, - outinfo->vs_output_param_offset, - VARYING_SLOT_MAX, - &outinfo->param_exports); -} + if (glsl_type_is_vector(type)) { + return LLVMVectorType( + glsl_base_to_llvm_type(ac, glsl_get_base_type(type)), + glsl_get_vector_elements(type)); + } -static void -ac_setup_rings(struct radv_shader_context *ctx) -{ - if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || - (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false)); + if (glsl_type_is_matrix(type)) { + return LLVMArrayType( + glsl_to_llvm_type(ac, glsl_get_column_type(type)), + glsl_get_matrix_columns(type)); } - if (ctx->is_gs_copy_shader) { - ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false)); + if (glsl_type_is_array(type)) { + return LLVMArrayType( + glsl_to_llvm_type(ac, glsl_get_array_element(type)), + glsl_get_length(type)); } - if (ctx->stage == MESA_SHADER_GEOMETRY) { - LLVMValueRef tmp; - ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false)); - ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false)); - ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, ""); + assert(glsl_type_is_struct(type)); - ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), ""); - tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.i32_1, ""); - tmp = LLVMBuildOr(ctx->ac.builder, tmp, ctx->gsvs_ring_stride, ""); - ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, ""); - } + LLVMTypeRef member_types[glsl_get_length(type)]; - if (ctx->stage == MESA_SHADER_TESS_CTRL || - ctx->stage == MESA_SHADER_TESS_EVAL) { - ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false)); - ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false)); + for (unsigned i = 0; i < glsl_get_length(type); i++) { + member_types[i] = + glsl_to_llvm_type(ac, + glsl_get_struct_field(type, i)); } + + return LLVMStructTypeInContext(ac->context, member_types, + glsl_get_length(type), false); } -static unsigned -ac_nir_get_max_workgroup_size(enum chip_class chip_class, - const struct nir_shader *nir) +static void +setup_locals(struct ac_nir_context *ctx, + struct nir_function *func) { - switch (nir->info.stage) { - case MESA_SHADER_TESS_CTRL: - return chip_class >= CIK ? 128 : 64; - case MESA_SHADER_GEOMETRY: - return chip_class >= GFX9 ? 128 : 64; - case MESA_SHADER_COMPUTE: - break; - default: - return 0; + int i, j; + ctx->num_locals = 0; + nir_foreach_variable(variable, &func->impl->locals) { + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + variable->data.driver_location = ctx->num_locals * 4; + variable->data.location_frac = 0; + ctx->num_locals += attrib_count; } + ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef)); + if (!ctx->locals) + return; - unsigned max_workgroup_size = nir->info.cs.local_size[0] * - nir->info.cs.local_size[1] * - nir->info.cs.local_size[2]; - return max_workgroup_size; -} - -/* Fixup the HW not emitting the TCS regs if there are no HS threads. */ -static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) -{ - LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, - LLVMConstInt(ctx->ac.i32, 8, false), - LLVMConstInt(ctx->ac.i32, 8, false), false); - LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, - ctx->ac.i32_0, ""); - ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); - ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, ""); - ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); - ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); + for (i = 0; i < ctx->num_locals; i++) { + for (j = 0; j < 4; j++) { + ctx->locals[i * 4 + j] = + ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp"); + } + } } -static void prepare_gs_input_vgprs(struct radv_shader_context *ctx) +static void +setup_shared(struct ac_nir_context *ctx, + struct nir_shader *nir) { - for(int i = 5; i >= 0; --i) { - ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1], - LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false), - LLVMConstInt(ctx->ac.i32, 16, false), false); + nir_foreach_variable(variable, &nir->shared) { + LLVMValueRef shared = + LLVMAddGlobalInAddressSpace( + ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type), + variable->name ? variable->name : "", + AC_LOCAL_ADDR_SPACE); + _mesa_hash_table_insert(ctx->vars, variable, shared); } - - ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, - LLVMConstInt(ctx->ac.i32, 16, false), - LLVMConstInt(ctx->ac.i32, 8, false), false); } void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, @@ -6779,487 +3857,6 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, ralloc_free(ctx.vars); } -static -LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, - struct nir_shader *const *shaders, - int shader_count, - struct ac_shader_variant_info *shader_info, - const struct ac_nir_compiler_options *options, - bool dump_shader) -{ - struct radv_shader_context ctx = {0}; - unsigned i; - ctx.options = options; - ctx.shader_info = shader_info; - ctx.context = LLVMContextCreate(); - - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); - LLVMSetTarget(ctx.ac.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); - - LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); - char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); - LLVMSetDataLayout(ctx.ac.module, data_layout_str); - LLVMDisposeTargetData(data_layout); - LLVMDisposeMessage(data_layout_str); - - enum ac_float_mode float_mode = - options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : - AC_FLOAT_MODE_DEFAULT; - - ctx.ac.builder = ac_create_builder(ctx.context, float_mode); - - memset(shader_info, 0, sizeof(*shader_info)); - - for(int i = 0; i < shader_count; ++i) - ac_nir_shader_info_pass(shaders[i], options, &shader_info->info); - - for (i = 0; i < AC_UD_MAX_SETS; i++) - shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; - for (i = 0; i < AC_UD_MAX_UD; i++) - shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; - - ctx.max_workgroup_size = 0; - for (int i = 0; i < shader_count; ++i) { - ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, - ac_nir_get_max_workgroup_size(ctx.options->chip_class, - shaders[i])); - } - - create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2, - shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX); - - ctx.abi.inputs = &ctx.inputs[0]; - ctx.abi.emit_outputs = handle_shader_outputs_post; - ctx.abi.emit_vertex = visit_emit_vertex; - ctx.abi.load_ubo = radv_load_ubo; - ctx.abi.load_ssbo = radv_load_ssbo; - ctx.abi.load_sampler_desc = radv_get_sampler_desc; - ctx.abi.load_resource = radv_load_resource; - ctx.abi.clamp_shadow_reference = false; - - if (shader_count >= 2) - ac_init_exec_full_mask(&ctx.ac); - - if (ctx.ac.chip_class == GFX9 && - shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) - ac_nir_fixup_ls_hs_input_vgprs(&ctx); - - for(int i = 0; i < shader_count; ++i) { - ctx.stage = shaders[i]->info.stage; - ctx.output_mask = 0; - ctx.tess_outputs_written = 0; - ctx.num_output_clips = shaders[i]->info.clip_distance_array_size; - ctx.num_output_culls = shaders[i]->info.cull_distance_array_size; - - if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { - ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex"); - ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out; - ctx.abi.load_inputs = load_gs_input; - ctx.abi.emit_primitive = visit_end_primitive; - } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { - ctx.tcs_outputs_read = shaders[i]->info.outputs_read; - ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read; - ctx.abi.load_tess_varyings = load_tcs_varyings; - ctx.abi.load_patch_vertices_in = load_patch_vertices_in; - ctx.abi.store_tcs_outputs = store_tcs_output; - ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; - } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { - ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode; - ctx.abi.load_tess_varyings = load_tes_input; - ctx.abi.load_tess_coord = load_tess_coord; - ctx.abi.load_patch_vertices_in = load_patch_vertices_in; - ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; - } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { - if (shader_info->info.vs.needs_instance_id) { - if (ctx.options->key.vs.as_ls) { - ctx.shader_info->vs.vgpr_comp_cnt = - MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt); - } else { - ctx.shader_info->vs.vgpr_comp_cnt = - MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt); - } - } - ctx.abi.load_base_vertex = radv_load_base_vertex; - } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { - shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard; - ctx.abi.lookup_interp_param = lookup_interp_param; - ctx.abi.load_sample_position = load_sample_position; - ctx.abi.load_sample_mask_in = load_sample_mask_in; - ctx.abi.emit_kill = radv_emit_kill; - } - - if (i) - ac_emit_barrier(&ctx.ac, ctx.stage); - - ac_setup_rings(&ctx); - - LLVMBasicBlockRef merge_block; - if (shader_count >= 2) { - LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); - LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); - merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); - - LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info, - LLVMConstInt(ctx.ac.i32, 8 * i, false), - LLVMConstInt(ctx.ac.i32, 8, false), false); - LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); - LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, - thread_id, count, ""); - LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block); - - LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block); - } - - if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) - handle_fs_inputs(&ctx, shaders[i]); - else if(shaders[i]->info.stage == MESA_SHADER_VERTEX) - handle_vs_inputs(&ctx, shaders[i]); - else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY) - prepare_gs_input_vgprs(&ctx); - - nir_foreach_variable(variable, &shaders[i]->outputs) - scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); - - ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]); - - if (shader_count >= 2) { - LLVMBuildBr(ctx.ac.builder, merge_block); - LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block); - } - - if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { - unsigned addclip = shaders[i]->info.clip_distance_array_size + - shaders[i]->info.cull_distance_array_size > 4; - shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; - shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size * - shaders[i]->info.gs.vertices_out; - } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { - shader_info->tcs.outputs_written = ctx.tess_outputs_written; - shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written; - } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) { - shader_info->vs.outputs_written = ctx.tess_outputs_written; - } - } - - LLVMBuildRetVoid(ctx.ac.builder); - - if (options->dump_preoptir) - ac_dump_module(ctx.ac.module); - - ac_llvm_finalize_module(&ctx); - - if (shader_count == 1) - ac_nir_eliminate_const_vs_outputs(&ctx); - - if (dump_shader) { - ctx.shader_info->private_mem_vgprs = - ac_count_scratch_private_memory(ctx.main_function); - } - - return ctx.ac.module; -} - -static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) -{ - unsigned *retval = (unsigned *)context; - LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); - char *description = LLVMGetDiagInfoDescription(di); - - if (severity == LLVMDSError) { - *retval = 1; - fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", - description); - } - - LLVMDisposeMessage(description); -} - -static unsigned ac_llvm_compile(LLVMModuleRef M, - struct ac_shader_binary *binary, - LLVMTargetMachineRef tm) -{ - unsigned retval = 0; - char *err; - LLVMContextRef llvm_ctx; - LLVMMemoryBufferRef out_buffer; - unsigned buffer_size; - const char *buffer_data; - LLVMBool mem_err; - - /* Setup Diagnostic Handler*/ - llvm_ctx = LLVMGetModuleContext(M); - - LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, - &retval); - - /* Compile IR*/ - mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, - &err, &out_buffer); - - /* Process Errors/Warnings */ - if (mem_err) { - fprintf(stderr, "%s: %s", __FUNCTION__, err); - free(err); - retval = 1; - goto out; - } - - /* Extract Shader Code*/ - buffer_size = LLVMGetBufferSize(out_buffer); - buffer_data = LLVMGetBufferStart(out_buffer); - - ac_elf_read(buffer_data, buffer_size, binary); - - /* Clean up */ - LLVMDisposeMemoryBuffer(out_buffer); - -out: - return retval; -} - -static void ac_compile_llvm_module(LLVMTargetMachineRef tm, - LLVMModuleRef llvm_module, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - gl_shader_stage stage, - bool dump_shader, bool supports_spill) -{ - if (dump_shader) - ac_dump_module(llvm_module); - - memset(binary, 0, sizeof(*binary)); - int v = ac_llvm_compile(llvm_module, binary, tm); - if (v) { - fprintf(stderr, "compile failed\n"); - } - - if (dump_shader) - fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); - - ac_shader_binary_read_config(binary, config, 0, supports_spill); - - LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); - LLVMDisposeModule(llvm_module); - LLVMContextDispose(ctx); - - if (stage == MESA_SHADER_FRAGMENT) { - shader_info->num_input_vgprs = 0; - if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 3; - if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 2; - if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) - shader_info->num_input_vgprs += 1; - } - config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); - - /* +3 for scratch wave offset and VCC */ - config->num_sgprs = MAX2(config->num_sgprs, - shader_info->num_input_sgprs + 3); - - /* Enable 64-bit and 16-bit denormals, because there is no performance - * cost. - * - * If denormals are enabled, all floating-point output modifiers are - * ignored. - * - * Don't enable denormals for 32-bit floats, because: - * - Floating-point output modifiers would be ignored by the hw. - * - Some opcodes don't support denormals, such as v_mad_f32. We would - * have to stop using those. - * - SI & CI would be very slow. - */ - config->float_mode |= V_00B028_FP_64_DENORMS; -} - -static void -ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options) -{ - switch (nir->info.stage) { - case MESA_SHADER_COMPUTE: - for (int i = 0; i < 3; ++i) - shader_info->cs.block_size[i] = nir->info.cs.local_size[i]; - break; - case MESA_SHADER_FRAGMENT: - shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests; - break; - case MESA_SHADER_GEOMETRY: - shader_info->gs.vertices_in = nir->info.gs.vertices_in; - shader_info->gs.vertices_out = nir->info.gs.vertices_out; - shader_info->gs.output_prim = nir->info.gs.output_primitive; - shader_info->gs.invocations = nir->info.gs.invocations; - break; - case MESA_SHADER_TESS_EVAL: - shader_info->tes.primitive_mode = nir->info.tess.primitive_mode; - shader_info->tes.spacing = nir->info.tess.spacing; - shader_info->tes.ccw = nir->info.tess.ccw; - shader_info->tes.point_mode = nir->info.tess.point_mode; - shader_info->tes.as_es = options->key.tes.as_es; - break; - case MESA_SHADER_TESS_CTRL: - shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; - break; - case MESA_SHADER_VERTEX: - shader_info->vs.as_es = options->key.vs.as_es; - shader_info->vs.as_ls = options->key.vs.as_ls; - /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */ - if (options->key.vs.as_ls) - shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt); - break; - default: - break; - } -} - -void ac_compile_nir_shader(LLVMTargetMachineRef tm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - struct nir_shader *const *nir, - int nir_count, - const struct ac_nir_compiler_options *options, - bool dump_shader) -{ - - LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info, - options, dump_shader); - - ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill); - for (int i = 0; i < nir_count; ++i) - ac_fill_shader_info(shader_info, nir[i], options); - - /* Determine the ES type (VS or TES) for the GS on GFX9. */ - if (options->chip_class == GFX9) { - if (nir_count == 2 && - nir[1]->info.stage == MESA_SHADER_GEOMETRY) { - shader_info->gs.es_type = nir[0]->info.stage; - } - } -} - -static void -ac_gs_copy_shader_emit(struct radv_shader_context *ctx) -{ - LLVMValueRef vtx_offset = - LLVMBuildMul(ctx->ac.builder, ctx->abi.vertex_id, - LLVMConstInt(ctx->ac.i32, 4, false), ""); - int idx = 0; - - for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { - int length = 4; - int slot = idx; - int slot_inc = 1; - if (!(ctx->output_mask & (1ull << i))) - continue; - - if (i == VARYING_SLOT_CLIP_DIST0) { - /* unpack clip and cull from a single set of slots */ - length = ctx->num_output_clips + ctx->num_output_culls; - if (length > 4) - slot_inc = 2; - } - - for (unsigned j = 0; j < length; j++) { - LLVMValueRef value, soffset; - - soffset = LLVMConstInt(ctx->ac.i32, - (slot * 4 + j) * - ctx->gs_max_out_vertices * 16 * 4, false); - - value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring, - 1, ctx->ac.i32_0, - vtx_offset, soffset, - 0, 1, 1, true, false); - - LLVMBuildStore(ctx->ac.builder, - ac_to_float(&ctx->ac, value), ctx->abi.outputs[radeon_llvm_reg_index_soa(i, j)]); - } - idx += slot_inc; - } - handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo); -} - -void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, - struct nir_shader *geom_shader, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - const struct ac_nir_compiler_options *options, - bool dump_shader) -{ - struct radv_shader_context ctx = {0}; - ctx.context = LLVMContextCreate(); - ctx.options = options; - ctx.shader_info = shader_info; - - ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, - options->family); - ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); - - ctx.is_gs_copy_shader = true; - LLVMSetTarget(ctx.ac.module, "amdgcn--"); - - enum ac_float_mode float_mode = - options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : - AC_FLOAT_MODE_DEFAULT; - - ctx.ac.builder = ac_create_builder(ctx.context, float_mode); - ctx.stage = MESA_SHADER_VERTEX; - - create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX); - - ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out; - ac_setup_rings(&ctx); - - ctx.num_output_clips = geom_shader->info.clip_distance_array_size; - ctx.num_output_culls = geom_shader->info.cull_distance_array_size; - - nir_foreach_variable(variable, &geom_shader->outputs) { - scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX); - ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader, - variable, MESA_SHADER_VERTEX); - } - - ac_gs_copy_shader_emit(&ctx); - - LLVMBuildRetVoid(ctx.ac.builder); - - ac_llvm_finalize_module(&ctx); - - ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info, - MESA_SHADER_VERTEX, - dump_shader, options->supports_spill); -} - void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class) { diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h index ebe18febbf2..4d48f7ebdd9 100644 --- a/src/amd/common/ac_nir_to_llvm.h +++ b/src/amd/common/ac_nir_to_llvm.h @@ -219,23 +219,6 @@ static inline unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan) return (index * 4) + chan; } -void ac_compile_nir_shader(LLVMTargetMachineRef tm, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - struct nir_shader *const *nir, - int nir_count, - const struct ac_nir_compiler_options *options, - bool dump_shader); - -void ac_create_gs_copy_shader(LLVMTargetMachineRef tm, - struct nir_shader *geom_shader, - struct ac_shader_binary *binary, - struct ac_shader_config *config, - struct ac_shader_variant_info *shader_info, - const struct ac_nir_compiler_options *options, - bool dump_shader); - void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class); void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi, diff --git a/src/amd/vulkan/Makefile.sources b/src/amd/vulkan/Makefile.sources index a510d88d965..47ff580e883 100644 --- a/src/amd/vulkan/Makefile.sources +++ b/src/amd/vulkan/Makefile.sources @@ -53,6 +53,7 @@ VULKAN_FILES := \ radv_meta_resolve.c \ radv_meta_resolve_cs.c \ radv_meta_resolve_fs.c \ + radv_nir_to_llvm.c \ radv_pass.c \ radv_pipeline.c \ radv_pipeline_cache.c \ diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build index 61aa8c4fde9..1fd6b755505 100644 --- a/src/amd/vulkan/meson.build +++ b/src/amd/vulkan/meson.build @@ -80,6 +80,7 @@ libradv_files = files( 'radv_meta_resolve.c', 'radv_meta_resolve_cs.c', 'radv_meta_resolve_fs.c', + 'radv_nir_to_llvm.c', 'radv_pass.c', 'radv_pipeline.c', 'radv_pipeline_cache.c', diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c new file mode 100644 index 00000000000..8779c9d2b10 --- /dev/null +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -0,0 +1,3456 @@ +/* + * Copyright © 2016 Red Hat. + * Copyright © 2016 Bas Nieuwenhuizen + * + * based in part on anv driver which is: + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "radv_private.h" +#include "nir/nir.h" + +#include +#include +#include + +#include "sid.h" +#include "gfx9d.h" +#include "ac_binary.h" +#include "ac_llvm_util.h" +#include "ac_llvm_build.h" +#include "ac_shader_abi.h" +#include "ac_shader_util.h" +#include "ac_exp_param.h" + +#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1) + +struct radv_shader_context { + struct ac_llvm_context ac; + const struct ac_nir_compiler_options *options; + struct ac_shader_variant_info *shader_info; + struct ac_shader_abi abi; + + unsigned max_workgroup_size; + LLVMContextRef context; + LLVMValueRef main_function; + + LLVMValueRef descriptor_sets[AC_UD_MAX_SETS]; + LLVMValueRef ring_offsets; + + LLVMValueRef vertex_buffers; + LLVMValueRef rel_auto_id; + LLVMValueRef vs_prim_id; + LLVMValueRef ls_out_layout; + LLVMValueRef es2gs_offset; + + LLVMValueRef tcs_offchip_layout; + LLVMValueRef tcs_out_offsets; + LLVMValueRef tcs_out_layout; + LLVMValueRef tcs_in_layout; + LLVMValueRef oc_lds; + LLVMValueRef merged_wave_info; + LLVMValueRef tess_factor_offset; + LLVMValueRef tes_rel_patch_id; + LLVMValueRef tes_u; + LLVMValueRef tes_v; + + LLVMValueRef gsvs_ring_stride; + LLVMValueRef gsvs_num_entries; + LLVMValueRef gs2vs_offset; + LLVMValueRef gs_wave_id; + LLVMValueRef gs_vtx_offset[6]; + + LLVMValueRef esgs_ring; + LLVMValueRef gsvs_ring; + LLVMValueRef hs_ring_tess_offchip; + LLVMValueRef hs_ring_tess_factor; + + LLVMValueRef sample_pos_offset; + LLVMValueRef persp_sample, persp_center, persp_centroid; + LLVMValueRef linear_sample, linear_center, linear_centroid; + + gl_shader_stage stage; + + LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4]; + + uint64_t input_mask; + uint64_t output_mask; + uint8_t num_output_clips; + uint8_t num_output_culls; + + bool is_gs_copy_shader; + LLVMValueRef gs_next_vertex; + unsigned gs_max_out_vertices; + + unsigned tes_primitive_mode; + uint64_t tess_outputs_written; + uint64_t tess_patch_outputs_written; + + uint32_t tcs_patch_outputs_read; + uint64_t tcs_outputs_read; + uint32_t tcs_vertices_per_patch; +}; + +enum radeon_llvm_calling_convention { + RADEON_LLVM_AMDGPU_VS = 87, + RADEON_LLVM_AMDGPU_GS = 88, + RADEON_LLVM_AMDGPU_PS = 89, + RADEON_LLVM_AMDGPU_CS = 90, + RADEON_LLVM_AMDGPU_HS = 93, +}; + +static inline struct radv_shader_context * +radv_shader_context_from_abi(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = NULL; + return container_of(abi, ctx, abi); +} + +static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx) +{ + switch (ctx->stage) { + case MESA_SHADER_TESS_CTRL: + return ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); + case MESA_SHADER_TESS_EVAL: + return ctx->tes_rel_patch_id; + break; + default: + unreachable("Illegal stage"); + } +} + +/* Tessellation shaders pass outputs to the next shader using LDS. + * + * LS outputs = TCS inputs + * TCS outputs = TES inputs + * + * The LDS layout is: + * - TCS inputs for patch 0 + * - TCS inputs for patch 1 + * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) + * - ... + * - TCS outputs for patch 0 = get_tcs_out_patch0_offset + * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset + * - TCS outputs for patch 1 + * - Per-patch TCS outputs for patch 1 + * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) + * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) + * - ... + * + * All three shaders VS(LS), TCS, TES share the same LDS space. + */ +static LLVMValueRef +get_tcs_in_patch_stride(struct radv_shader_context *ctx) +{ + if (ctx->stage == MESA_SHADER_VERTEX) + return ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13); + else if (ctx->stage == MESA_SHADER_TESS_CTRL) + return ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13); + else { + assert(0); + return NULL; + } +} + +static LLVMValueRef +get_tcs_out_patch_stride(struct radv_shader_context *ctx) +{ + return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13); +} + +static LLVMValueRef +get_tcs_out_vertex_stride(struct radv_shader_context *ctx) +{ + return ac_unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8); +} + +static LLVMValueRef +get_tcs_out_patch0_offset(struct radv_shader_context *ctx) +{ + return LLVMBuildMul(ctx->ac.builder, + ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16), + LLVMConstInt(ctx->ac.i32, 4, false), ""); +} + +static LLVMValueRef +get_tcs_out_patch0_patch_data_offset(struct radv_shader_context *ctx) +{ + return LLVMBuildMul(ctx->ac.builder, + ac_unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16), + LLVMConstInt(ctx->ac.i32, 4, false), ""); +} + +static LLVMValueRef +get_tcs_in_current_patch_offset(struct radv_shader_context *ctx) +{ + LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, ""); +} + +static LLVMValueRef +get_tcs_out_current_patch_offset(struct radv_shader_context *ctx) +{ + LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx); + LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return LLVMBuildAdd(ctx->ac.builder, patch0_offset, + LLVMBuildMul(ctx->ac.builder, patch_stride, + rel_patch_id, ""), + ""); +} + +static LLVMValueRef +get_tcs_out_current_patch_data_offset(struct radv_shader_context *ctx) +{ + LLVMValueRef patch0_patch_data_offset = + get_tcs_out_patch0_patch_data_offset(ctx); + LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + return LLVMBuildAdd(ctx->ac.builder, patch0_patch_data_offset, + LLVMBuildMul(ctx->ac.builder, patch_stride, + rel_patch_id, ""), + ""); +} + +#define MAX_ARGS 23 +struct arg_info { + LLVMTypeRef types[MAX_ARGS]; + LLVMValueRef *assign[MAX_ARGS]; + unsigned array_params_mask; + uint8_t count; + uint8_t sgpr_count; + uint8_t num_sgprs_used; + uint8_t num_vgprs_used; +}; + +enum ac_arg_regfile { + ARG_SGPR, + ARG_VGPR, +}; + +static void +add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type, + LLVMValueRef *param_ptr) +{ + assert(info->count < MAX_ARGS); + + info->assign[info->count] = param_ptr; + info->types[info->count] = type; + info->count++; + + if (regfile == ARG_SGPR) { + info->num_sgprs_used += ac_get_type_size(type) / 4; + info->sgpr_count++; + } else { + assert(regfile == ARG_VGPR); + info->num_vgprs_used += ac_get_type_size(type) / 4; + } +} + +static inline void +add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr) +{ + info->array_params_mask |= (1 << info->count); + add_arg(info, ARG_SGPR, type, param_ptr); +} + +static void assign_arguments(LLVMValueRef main_function, + struct arg_info *info) +{ + unsigned i; + for (i = 0; i < info->count; i++) { + if (info->assign[i]) + *info->assign[i] = LLVMGetParam(main_function, i); + } +} + +static LLVMValueRef +create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module, + LLVMBuilderRef builder, LLVMTypeRef *return_types, + unsigned num_return_elems, + struct arg_info *args, + unsigned max_workgroup_size, + bool unsafe_math) +{ + LLVMTypeRef main_function_type, ret_type; + LLVMBasicBlockRef main_function_body; + + if (num_return_elems) + ret_type = LLVMStructTypeInContext(ctx, return_types, + num_return_elems, true); + else + ret_type = LLVMVoidTypeInContext(ctx); + + /* Setup the function */ + main_function_type = + LLVMFunctionType(ret_type, args->types, args->count, 0); + LLVMValueRef main_function = + LLVMAddFunction(module, "main", main_function_type); + main_function_body = + LLVMAppendBasicBlockInContext(ctx, main_function, "main_body"); + LLVMPositionBuilderAtEnd(builder, main_function_body); + + LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS); + for (unsigned i = 0; i < args->sgpr_count; ++i) { + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG); + + if (args->array_params_mask & (1 << i)) { + LLVMValueRef P = LLVMGetParam(main_function, i); + ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS); + ac_add_attr_dereferenceable(P, UINT64_MAX); + } + } + + if (max_workgroup_size) { + ac_llvm_add_target_dep_function_attr(main_function, + "amdgpu-max-work-group-size", + max_workgroup_size); + } + if (unsafe_math) { + /* These were copied from some LLVM test. */ + LLVMAddTargetDependentFunctionAttr(main_function, + "less-precise-fpmad", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "no-infs-fp-math", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "no-nans-fp-math", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "unsafe-fp-math", + "true"); + LLVMAddTargetDependentFunctionAttr(main_function, + "no-signed-zeros-fp-math", + "true"); + } + return main_function; +} + + +static void +set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs, + uint32_t indirect_offset) +{ + ud_info->sgpr_idx = *sgpr_idx; + ud_info->num_sgprs = num_sgprs; + ud_info->indirect = indirect_offset > 0; + ud_info->indirect_offset = indirect_offset; + *sgpr_idx += num_sgprs; +} + +static void +set_loc_shader(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, + uint8_t num_sgprs) +{ + struct ac_userdata_info *ud_info = + &ctx->shader_info->user_sgprs_locs.shader_data[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, num_sgprs, 0); +} + +static void +set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx, + uint32_t indirect_offset) +{ + struct ac_userdata_info *ud_info = + &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx]; + assert(ud_info); + + set_loc(ud_info, sgpr_idx, 2, indirect_offset); +} + +struct user_sgpr_info { + bool need_ring_offsets; + uint8_t sgpr_count; + bool indirect_all_descriptor_sets; +}; + +static bool needs_view_index_sgpr(struct radv_shader_context *ctx, + gl_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + if (ctx->shader_info->info.needs_multiview_view_index || + (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index)) + return true; + break; + case MESA_SHADER_GEOMETRY: + case MESA_SHADER_TESS_CTRL: + if (ctx->shader_info->info.needs_multiview_view_index) + return true; + break; + default: + break; + } + return false; +} + +static uint8_t +count_vs_user_sgprs(struct radv_shader_context *ctx) +{ + uint8_t count = 0; + + count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0; + count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2; + + return count; +} + +static void allocate_user_sgprs(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + bool needs_view_index, + struct user_sgpr_info *user_sgpr_info) +{ + memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info)); + + /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */ + if (stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL || + ctx->is_gs_copy_shader) + user_sgpr_info->need_ring_offsets = true; + + if (stage == MESA_SHADER_FRAGMENT && + ctx->shader_info->info.ps.needs_sample_positions) + user_sgpr_info->need_ring_offsets = true; + + /* 2 user sgprs will nearly always be allocated for scratch/rings */ + if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) { + user_sgpr_info->sgpr_count += 2; + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (ctx->shader_info->info.cs.uses_grid_size) + user_sgpr_info->sgpr_count += 3; + break; + case MESA_SHADER_FRAGMENT: + user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions; + break; + case MESA_SHADER_VERTEX: + if (!ctx->is_gs_copy_shader) + user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); + if (ctx->options->key.vs.as_ls) + user_sgpr_info->sgpr_count++; + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); + user_sgpr_info->sgpr_count++; + } + user_sgpr_info->sgpr_count += 4; + break; + case MESA_SHADER_TESS_EVAL: + user_sgpr_info->sgpr_count += 1; + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) { + user_sgpr_info->sgpr_count += count_vs_user_sgprs(ctx); + } else { + user_sgpr_info->sgpr_count++; + } + } + user_sgpr_info->sgpr_count += 2; + break; + default: + break; + } + + if (needs_view_index) + user_sgpr_info->sgpr_count++; + + if (ctx->shader_info->info.loads_push_constants) + user_sgpr_info->sgpr_count += 2; + + uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16; + uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count; + + if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) { + user_sgpr_info->sgpr_count += 2; + user_sgpr_info->indirect_all_descriptor_sets = true; + } else { + user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2; + } +} + +static void +declare_global_input_sgprs(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + const struct user_sgpr_info *user_sgpr_info, + struct arg_info *args, + LLVMValueRef *desc_sets) +{ + LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8); + unsigned num_sets = ctx->options->layout ? + ctx->options->layout->num_sets : 0; + unsigned stage_mask = 1 << stage; + + if (has_previous_stage) + stage_mask |= 1 << previous_stage; + + /* 1 for each descriptor set */ + if (!user_sgpr_info->indirect_all_descriptor_sets) { + for (unsigned i = 0; i < num_sets; ++i) { + if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && + ctx->options->layout->set[i].layout->shader_stages & stage_mask) { + add_array_arg(args, type, + &ctx->descriptor_sets[i]); + } + } + } else { + add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets); + } + + if (ctx->shader_info->info.loads_push_constants) { + /* 1 for push constants and dynamic descriptors */ + add_array_arg(args, type, &ctx->abi.push_constants); + } +} + +static void +declare_vs_specific_input_sgprs(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage, + struct arg_info *args) +{ + if (!ctx->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (ctx->shader_info->info.vs.has_vertex_buffers) { + add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), + &ctx->vertex_buffers); + } + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex); + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance); + if (ctx->shader_info->info.vs.needs_draw_id) { + add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id); + } + } +} + +static void +declare_vs_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) +{ + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id); + if (!ctx->is_gs_copy_shader) { + if (ctx->options->key.vs.as_ls) { + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); + } else { + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id); + } + add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */ + } +} + +static void +declare_tes_input_vgprs(struct radv_shader_context *ctx, struct arg_info *args) +{ + add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u); + add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id); + add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id); +} + +static void +set_global_input_locs(struct radv_shader_context *ctx, gl_shader_stage stage, + bool has_previous_stage, gl_shader_stage previous_stage, + const struct user_sgpr_info *user_sgpr_info, + LLVMValueRef desc_sets, uint8_t *user_sgpr_idx) +{ + unsigned num_sets = ctx->options->layout ? + ctx->options->layout->num_sets : 0; + unsigned stage_mask = 1 << stage; + + if (has_previous_stage) + stage_mask |= 1 << previous_stage; + + if (!user_sgpr_info->indirect_all_descriptor_sets) { + for (unsigned i = 0; i < num_sets; ++i) { + if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && + ctx->options->layout->set[i].layout->shader_stages & stage_mask) { + set_loc_desc(ctx, i, user_sgpr_idx, 0); + } else + ctx->descriptor_sets[i] = NULL; + } + } else { + set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, + user_sgpr_idx, 2); + + for (unsigned i = 0; i < num_sets; ++i) { + if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) && + ctx->options->layout->set[i].layout->shader_stages & stage_mask) { + set_loc_desc(ctx, i, user_sgpr_idx, i * 8); + ctx->descriptor_sets[i] = + ac_build_load_to_sgpr(&ctx->ac, + desc_sets, + LLVMConstInt(ctx->ac.i32, i, false)); + + } else + ctx->descriptor_sets[i] = NULL; + } + ctx->shader_info->need_indirect_descriptor_sets = true; + } + + if (ctx->shader_info->info.loads_push_constants) { + set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2); + } +} + +static void +set_vs_specific_input_locs(struct radv_shader_context *ctx, + gl_shader_stage stage, bool has_previous_stage, + gl_shader_stage previous_stage, + uint8_t *user_sgpr_idx) +{ + if (!ctx->is_gs_copy_shader && + (stage == MESA_SHADER_VERTEX || + (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) { + if (ctx->shader_info->info.vs.has_vertex_buffers) { + set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, + user_sgpr_idx, 2); + } + + unsigned vs_num = 2; + if (ctx->shader_info->info.vs.needs_draw_id) + vs_num++; + + set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, + user_sgpr_idx, vs_num); + } +} + +static unsigned shader_io_get_unique_index(gl_varying_slot slot) +{ + /* handle patch indices separate */ + if (slot == VARYING_SLOT_TESS_LEVEL_OUTER) + return 0; + if (slot == VARYING_SLOT_TESS_LEVEL_INNER) + return 1; + if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX) + return 2 + (slot - VARYING_SLOT_PATCH0); + + if (slot == VARYING_SLOT_POS) + return 0; + if (slot == VARYING_SLOT_PSIZ) + return 1; + if (slot == VARYING_SLOT_CLIP_DIST0) + return 2; + /* 3 is reserved for clip dist as well */ + if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31) + return 4 + (slot - VARYING_SLOT_VAR0); + unreachable("illegal slot in get unique index\n"); +} + +static void set_llvm_calling_convention(LLVMValueRef func, + gl_shader_stage stage) +{ + enum radeon_llvm_calling_convention calling_conv; + + switch (stage) { + case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_EVAL: + calling_conv = RADEON_LLVM_AMDGPU_VS; + break; + case MESA_SHADER_GEOMETRY: + calling_conv = RADEON_LLVM_AMDGPU_GS; + break; + case MESA_SHADER_TESS_CTRL: + calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS; + break; + case MESA_SHADER_FRAGMENT: + calling_conv = RADEON_LLVM_AMDGPU_PS; + break; + case MESA_SHADER_COMPUTE: + calling_conv = RADEON_LLVM_AMDGPU_CS; + break; + default: + unreachable("Unhandle shader type"); + } + + LLVMSetFunctionCallConv(func, calling_conv); +} + +static void create_function(struct radv_shader_context *ctx, + gl_shader_stage stage, + bool has_previous_stage, + gl_shader_stage previous_stage) +{ + uint8_t user_sgpr_idx; + struct user_sgpr_info user_sgpr_info; + struct arg_info args = {}; + LLVMValueRef desc_sets; + bool needs_view_index = needs_view_index_sgpr(ctx, stage); + allocate_user_sgprs(ctx, stage, has_previous_stage, + previous_stage, needs_view_index, &user_sgpr_info); + + if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) { + add_arg(&args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32), + &ctx->ring_offsets); + } + + switch (stage) { + case MESA_SHADER_COMPUTE: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + + if (ctx->shader_info->info.cs.uses_grid_size) { + add_arg(&args, ARG_SGPR, ctx->ac.v3i32, + &ctx->abi.num_work_groups); + } + + for (int i = 0; i < 3; i++) { + ctx->abi.workgroup_ids[i] = NULL; + if (ctx->shader_info->info.cs.uses_block_id[i]) { + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.workgroup_ids[i]); + } + } + + if (ctx->shader_info->info.cs.uses_local_invocation_idx) + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.tg_size); + add_arg(&args, ARG_VGPR, ctx->ac.v3i32, + &ctx->abi.local_invocation_ids); + break; + case MESA_SHADER_VERTEX: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &args); + + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + if (ctx->options->key.vs.as_es) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->es2gs_offset); + else if (ctx->options->key.vs.as_ls) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->ls_out_layout); + + declare_vs_input_vgprs(ctx, &args); + break; + case MESA_SHADER_TESS_CTRL: + if (has_previous_stage) { + // First 6 system regs + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->merged_wave_info); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tess_factor_offset); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + declare_vs_specific_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, &args); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->ls_out_layout); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_offchip_layout); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_out_offsets); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_out_layout); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_in_layout); + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_patch_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_rel_ids); + + declare_vs_input_vgprs(ctx, &args); + } else { + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_offchip_layout); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_out_offsets); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_out_layout); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_in_layout); + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tess_factor_offset); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_patch_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.tcs_rel_ids); + } + break; + case MESA_SHADER_TESS_EVAL: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout); + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + if (ctx->options->key.tes.as_es) { + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->es2gs_offset); + } else { + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + } + declare_tes_input_vgprs(ctx, &args); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + // First 6 system regs + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->gs2vs_offset); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->merged_wave_info); + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown + + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + + if (previous_stage == MESA_SHADER_TESS_EVAL) { + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->tcs_offchip_layout); + } else { + declare_vs_specific_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &args); + } + + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->gsvs_ring_stride); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->gsvs_num_entries); + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[0]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[2]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_prim_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_invocation_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[4]); + + if (previous_stage == MESA_SHADER_VERTEX) { + declare_vs_input_vgprs(ctx, &args); + } else { + declare_tes_input_vgprs(ctx, &args); + } + } else { + declare_global_input_sgprs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_info, &args, + &desc_sets); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->gsvs_ring_stride); + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->gsvs_num_entries); + if (needs_view_index) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->abi.view_index); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset); + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[0]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[1]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_prim_id); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[2]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[3]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[4]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->gs_vtx_offset[5]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, + &ctx->abi.gs_invocation_id); + } + break; + case MESA_SHADER_FRAGMENT: + declare_global_input_sgprs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_info, + &args, &desc_sets); + + if (ctx->shader_info->info.ps.needs_sample_positions) + add_arg(&args, ARG_SGPR, ctx->ac.i32, + &ctx->sample_pos_offset); + + add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->abi.prim_mask); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid); + add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */ + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center); + add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid); + add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */ + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]); + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]); + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]); + add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]); + add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face); + add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary); + add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage); + add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */ + break; + default: + unreachable("Shader stage not implemented"); + } + + ctx->main_function = create_llvm_function( + ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args, + ctx->max_workgroup_size, + ctx->options->unsafe_math); + set_llvm_calling_convention(ctx->main_function, stage); + + + ctx->shader_info->num_input_vgprs = 0; + ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0; + + ctx->shader_info->num_input_sgprs += args.num_sgprs_used; + + if (ctx->stage != MESA_SHADER_FRAGMENT) + ctx->shader_info->num_input_vgprs = args.num_vgprs_used; + + assign_arguments(ctx->main_function, &args); + + user_sgpr_idx = 0; + + if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) { + set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, + &user_sgpr_idx, 2); + if (ctx->options->supports_spill) { + ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr", + LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE), + NULL, 0, AC_FUNC_ATTR_READNONE); + ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets, + ac_array_in_const_addr_space(ctx->ac.v4i32), ""); + } + } + + /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including + * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */ + if (has_previous_stage) + user_sgpr_idx = 0; + + set_global_input_locs(ctx, stage, has_previous_stage, previous_stage, + &user_sgpr_info, desc_sets, &user_sgpr_idx); + + switch (stage) { + case MESA_SHADER_COMPUTE: + if (ctx->shader_info->info.cs.uses_grid_size) { + set_loc_shader(ctx, AC_UD_CS_GRID_SIZE, + &user_sgpr_idx, 3); + } + break; + case MESA_SHADER_VERTEX: + set_vs_specific_input_locs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + if (ctx->options->key.vs.as_ls) { + set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, + &user_sgpr_idx, 1); + } + break; + case MESA_SHADER_TESS_CTRL: + set_vs_specific_input_locs(ctx, stage, has_previous_stage, + previous_stage, &user_sgpr_idx); + if (has_previous_stage) + set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, + &user_sgpr_idx, 1); + set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4); + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_TESS_EVAL: + set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1); + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_GEOMETRY: + if (has_previous_stage) { + if (previous_stage == MESA_SHADER_VERTEX) + set_vs_specific_input_locs(ctx, stage, + has_previous_stage, + previous_stage, + &user_sgpr_idx); + else + set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, + &user_sgpr_idx, 1); + } + set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, + &user_sgpr_idx, 2); + if (ctx->abi.view_index) + set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1); + break; + case MESA_SHADER_FRAGMENT: + if (ctx->shader_info->info.ps.needs_sample_positions) { + set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, + &user_sgpr_idx, 1); + } + break; + default: + unreachable("Shader stage not implemented"); + } + + if (stage == MESA_SHADER_TESS_CTRL || + (stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_ls) || + /* GFX9 has the ESGS ring buffer in LDS. */ + (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) { + ac_declare_lds_as_pointer(&ctx->ac); + } + + ctx->shader_info->num_user_sgprs = user_sgpr_idx; +} + + +static LLVMValueRef +radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, + unsigned desc_set, unsigned binding) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set]; + struct radv_pipeline_layout *pipeline_layout = ctx->options->layout; + struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout; + unsigned base_offset = layout->binding[binding].offset; + LLVMValueRef offset, stride; + + if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC || + layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { + unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start + + layout->binding[binding].dynamic_offset_offset; + desc_ptr = ctx->abi.push_constants; + base_offset = pipeline_layout->push_constant_size + 16 * idx; + stride = LLVMConstInt(ctx->ac.i32, 16, false); + } else + stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false); + + offset = LLVMConstInt(ctx->ac.i32, base_offset, false); + index = LLVMBuildMul(ctx->ac.builder, index, stride, ""); + offset = LLVMBuildAdd(ctx->ac.builder, offset, index, ""); + + desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset); + desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32); + LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + + return desc_ptr; +} + + +/* The offchip buffer layout for TCS->TES is + * + * - attribute 0 of patch 0 vertex 0 + * - attribute 0 of patch 0 vertex 1 + * - attribute 0 of patch 0 vertex 2 + * ... + * - attribute 0 of patch 1 vertex 0 + * - attribute 0 of patch 1 vertex 1 + * ... + * - attribute 1 of patch 0 vertex 0 + * - attribute 1 of patch 0 vertex 1 + * ... + * - per patch attribute 0 of patch 0 + * - per patch attribute 0 of patch 1 + * ... + * + * Note that every attribute has 4 components. + */ +static LLVMValueRef get_tcs_tes_buffer_address(struct radv_shader_context *ctx, + LLVMValueRef vertex_index, + LLVMValueRef param_index) +{ + LLVMValueRef base_addr, vertices_per_patch, num_patches; + LLVMValueRef param_stride, constant16; + LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); + + vertices_per_patch = LLVMConstInt(ctx->ac.i32, ctx->tcs_vertices_per_patch, false); + num_patches = ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9); + + constant16 = LLVMConstInt(ctx->ac.i32, 16, false); + if (vertex_index) { + base_addr = LLVMBuildMul(ctx->ac.builder, rel_patch_id, + vertices_per_patch, ""); + + base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, + vertex_index, ""); + + param_stride = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, + num_patches, ""); + } else { + base_addr = rel_patch_id; + param_stride = num_patches; + } + + base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, + LLVMBuildMul(ctx->ac.builder, param_index, + param_stride, ""), ""); + + base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, ""); + + if (!vertex_index) { + LLVMValueRef patch_data_offset = + ac_unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16); + + base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, + patch_data_offset, ""); + } + return base_addr; +} + +static LLVMValueRef get_tcs_tes_buffer_address_params(struct radv_shader_context *ctx, + unsigned param, + unsigned const_index, + bool is_compact, + LLVMValueRef vertex_index, + LLVMValueRef indir_index) +{ + LLVMValueRef param_index; + + if (indir_index) + param_index = LLVMBuildAdd(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, param, false), + indir_index, ""); + else { + if (const_index && !is_compact) + param += const_index; + param_index = LLVMConstInt(ctx->ac.i32, param, false); + } + return get_tcs_tes_buffer_address(ctx, vertex_index, param_index); +} + +static void +mark_tess_output(struct radv_shader_context *ctx, + bool is_patch, uint32_t param) + +{ + if (is_patch) { + ctx->tess_patch_outputs_written |= (1ull << param); + } else + ctx->tess_outputs_written |= (1ull << param); +} + +static LLVMValueRef +get_dw_address(struct radv_shader_context *ctx, + LLVMValueRef dw_addr, + unsigned param, + unsigned const_index, + bool compact_const_index, + LLVMValueRef vertex_index, + LLVMValueRef stride, + LLVMValueRef indir_index) + +{ + + if (vertex_index) { + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMBuildMul(ctx->ac.builder, + vertex_index, + stride, ""), ""); + } + + if (indir_index) + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMBuildMul(ctx->ac.builder, indir_index, + LLVMConstInt(ctx->ac.i32, 4, false), ""), ""); + else if (const_index && !compact_const_index) + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, const_index, false), ""); + + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, param * 4, false), ""); + + if (const_index && compact_const_index) + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, const_index, false), ""); + return dw_addr; +} + +static LLVMValueRef +load_tcs_varyings(struct ac_shader_abi *abi, + LLVMTypeRef type, + LLVMValueRef vertex_index, + LLVMValueRef indir_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + bool is_patch, + bool is_compact, + bool load_input) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef dw_addr, stride; + LLVMValueRef value[4], result; + unsigned param = shader_io_get_unique_index(location); + + if (load_input) { + stride = ac_unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8); + dw_addr = get_tcs_in_current_patch_offset(ctx); + } else { + if (!is_patch) { + stride = get_tcs_out_vertex_stride(ctx); + dw_addr = get_tcs_out_current_patch_offset(ctx); + } else { + dw_addr = get_tcs_out_current_patch_data_offset(ctx); + stride = NULL; + } + } + + dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, + indir_index); + + for (unsigned i = 0; i < num_components + component; i++) { + value[i] = ac_lds_load(&ctx->ac, dw_addr); + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + ctx->ac.i32_1, ""); + } + result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); + return result; +} + +static void +store_tcs_output(struct ac_shader_abi *abi, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + LLVMValueRef src, + unsigned component, + bool is_patch, + bool is_compact, + unsigned writemask) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef dw_addr; + LLVMValueRef stride = NULL; + LLVMValueRef buf_addr = NULL; + unsigned param; + bool store_lds = true; + + if (is_patch) { + if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0)))) + store_lds = false; + } else { + if (!(ctx->tcs_outputs_read & (1ULL << location))) + store_lds = false; + } + + param = shader_io_get_unique_index(location); + if (location == VARYING_SLOT_CLIP_DIST0 && + is_compact && const_index > 3) { + const_index -= 3; + param++; + } + + if (!is_patch) { + stride = get_tcs_out_vertex_stride(ctx); + dw_addr = get_tcs_out_current_patch_offset(ctx); + } else { + dw_addr = get_tcs_out_current_patch_data_offset(ctx); + } + + mark_tess_output(ctx, is_patch, param); + + dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride, + param_index); + buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact, + vertex_index, param_index); + + bool is_tess_factor = false; + if (location == VARYING_SLOT_TESS_LEVEL_INNER || + location == VARYING_SLOT_TESS_LEVEL_OUTER) + is_tess_factor = true; + + unsigned base = is_compact ? const_index : 0; + for (unsigned chan = 0; chan < 8; chan++) { + if (!(writemask & (1 << chan))) + continue; + LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); + + if (store_lds || is_tess_factor) { + LLVMValueRef dw_addr_chan = + LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, chan, false), ""); + ac_lds_store(&ctx->ac, dw_addr_chan, value); + } + + if (!is_tess_factor && writemask != 0xF) + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1, + buf_addr, ctx->oc_lds, + 4 * (base + chan), 1, 0, true, false); + } + + if (writemask == 0xF) { + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4, + buf_addr, ctx->oc_lds, + (base * 4), 1, 0, true, false); + } +} + +static LLVMValueRef +load_tes_input(struct ac_shader_abi *abi, + LLVMTypeRef type, + LLVMValueRef vertex_index, + LLVMValueRef param_index, + unsigned const_index, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + bool is_patch, + bool is_compact, + bool load_input) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef buf_addr; + LLVMValueRef result; + unsigned param = shader_io_get_unique_index(location); + + if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) { + const_index -= 3; + param++; + } + + buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, + is_compact, vertex_index, param_index); + + LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false); + buf_addr = LLVMBuildAdd(ctx->ac.builder, buf_addr, comp_offset, ""); + + result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL, + buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false); + result = ac_trim_vector(&ctx->ac, result, num_components); + return result; +} + +static LLVMValueRef +load_gs_input(struct ac_shader_abi *abi, + unsigned location, + unsigned driver_location, + unsigned component, + unsigned num_components, + unsigned vertex_index, + unsigned const_index, + LLVMTypeRef type) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef vtx_offset; + unsigned param, vtx_offset_param; + LLVMValueRef value[4], result; + + vtx_offset_param = vertex_index; + assert(vtx_offset_param < 6); + vtx_offset = LLVMBuildMul(ctx->ac.builder, ctx->gs_vtx_offset[vtx_offset_param], + LLVMConstInt(ctx->ac.i32, 4, false), ""); + + param = shader_io_get_unique_index(location); + + for (unsigned i = component; i < num_components + component; i++) { + if (ctx->ac.chip_class >= GFX9) { + LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param]; + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, + LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), ""); + value[i] = ac_lds_load(&ctx->ac, dw_addr); + } else { + LLVMValueRef soffset = + LLVMConstInt(ctx->ac.i32, + (param * 4 + i + const_index) * 256, + false); + + value[i] = ac_build_buffer_load(&ctx->ac, + ctx->esgs_ring, 1, + ctx->ac.i32_0, + vtx_offset, soffset, + 0, 1, 0, true, false); + + value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], + type, ""); + } + } + result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component); + result = ac_to_integer(&ctx->ac, result); + return result; +} + + +static void radv_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + ac_build_kill_if_false(&ctx->ac, visible); +} + +static LLVMValueRef lookup_interp_param(struct ac_shader_abi *abi, + enum glsl_interp_mode interp, unsigned location) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + switch (interp) { + case INTERP_MODE_FLAT: + default: + return NULL; + case INTERP_MODE_SMOOTH: + case INTERP_MODE_NONE: + if (location == INTERP_CENTER) + return ctx->persp_center; + else if (location == INTERP_CENTROID) + return ctx->persp_centroid; + else if (location == INTERP_SAMPLE) + return ctx->persp_sample; + break; + case INTERP_MODE_NOPERSPECTIVE: + if (location == INTERP_CENTER) + return ctx->linear_center; + else if (location == INTERP_CENTROID) + return ctx->linear_centroid; + else if (location == INTERP_SAMPLE) + return ctx->linear_sample; + break; + } + return NULL; +} + +static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, + LLVMValueRef sample_id) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + LLVMValueRef result; + LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false)); + + ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, + ac_array_in_const_addr_space(ctx->ac.v2f32), ""); + + sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id, ctx->sample_pos_offset, ""); + result = ac_build_load_invariant(&ctx->ac, ptr, sample_id); + + return result; +} + + +static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + uint8_t log2_ps_iter_samples = ctx->shader_info->info.ps.force_persample ? + ctx->options->key.fs.log2_num_samples : + ctx->options->key.fs.log2_ps_iter_samples; + + /* The bit pattern matches that used by fixed function fragment + * processing. */ + static const uint16_t ps_iter_masks[] = { + 0xffff, /* not used */ + 0x5555, + 0x1111, + 0x0101, + 0x0001, + }; + assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks)); + + uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples]; + + LLVMValueRef result, sample_id; + sample_id = ac_unpack_param(&ctx->ac, abi->ancillary, 8, 4); + sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, ""); + result = LLVMBuildAnd(ctx->ac.builder, sample_id, abi->sample_coverage, ""); + return result; +} + + +static void +visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs) +{ + LLVMValueRef gs_next_vertex; + LLVMValueRef can_emit; + int idx; + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + assert(stream == 0); + + /* Write vertex attribute values to GSVS ring */ + gs_next_vertex = LLVMBuildLoad(ctx->ac.builder, + ctx->gs_next_vertex, + ""); + + /* If this thread has already emitted the declared maximum number of + * vertices, kill it: excessive vertex emissions are not supposed to + * have any effect, and GS threads have no externally observable + * effects other than emitting vertices. + */ + can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, gs_next_vertex, + LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), ""); + ac_build_kill_if_false(&ctx->ac, can_emit); + + /* loop num outputs */ + idx = 0; + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef *out_ptr = &addrs[i * 4]; + int length = 4; + int slot = idx; + int slot_inc = 1; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0) { + /* pack clip and cull into a single set of slots */ + length = ctx->num_output_clips + ctx->num_output_culls; + if (length > 4) + slot_inc = 2; + } + for (unsigned j = 0; j < length; j++) { + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, + out_ptr[j], ""); + LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false); + voffset = LLVMBuildAdd(ctx->ac.builder, voffset, gs_next_vertex, ""); + voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), ""); + + out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); + + ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring, + out_val, 1, + voffset, ctx->gs2vs_offset, 0, + 1, 1, true, true); + } + idx += slot_inc; + } + + gs_next_vertex = LLVMBuildAdd(ctx->ac.builder, gs_next_vertex, + ctx->ac.i32_1, ""); + LLVMBuildStore(ctx->ac.builder, gs_next_vertex, ctx->gs_next_vertex); + + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id); +} + +static void +visit_end_primitive(struct ac_shader_abi *abi, unsigned stream) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id); +} + +static LLVMValueRef +load_tess_coord(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + LLVMValueRef coord[4] = { + ctx->tes_u, + ctx->tes_v, + ctx->ac.f32_0, + ctx->ac.f32_0, + }; + + if (ctx->tes_primitive_mode == GL_TRIANGLES) + coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1, + LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), ""); + + return ac_build_gather_values(&ctx->ac, coord, 3); +} + +static LLVMValueRef +load_patch_vertices_in(struct ac_shader_abi *abi) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false); +} + + +static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi) +{ + return abi->base_vertex; +} + +static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi, + LLVMValueRef buffer_ptr, bool write) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef result; + + LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + + result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); + LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); + + return result; +} + +static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef result; + + LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md); + + result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, ""); + LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md); + + return result; +} + +static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi, + unsigned descriptor_set, + unsigned base_index, + unsigned constant_index, + LLVMValueRef index, + enum ac_descriptor_type desc_type, + bool image, bool write) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + LLVMValueRef list = ctx->descriptor_sets[descriptor_set]; + struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout; + struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index; + unsigned offset = binding->offset; + unsigned stride = binding->size; + unsigned type_size; + LLVMBuilderRef builder = ctx->ac.builder; + LLVMTypeRef type; + + assert(base_index < layout->binding_count); + + switch (desc_type) { + case AC_DESC_IMAGE: + type = ctx->ac.v8i32; + type_size = 32; + break; + case AC_DESC_FMASK: + type = ctx->ac.v8i32; + offset += 32; + type_size = 32; + break; + case AC_DESC_SAMPLER: + type = ctx->ac.v4i32; + if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + offset += 64; + + type_size = 16; + break; + case AC_DESC_BUFFER: + type = ctx->ac.v4i32; + type_size = 16; + break; + default: + unreachable("invalid desc_type\n"); + } + + offset += constant_index * stride; + + if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset && + (!index || binding->immutable_samplers_equal)) { + if (binding->immutable_samplers_equal) + constant_index = 0; + + const uint32_t *samplers = radv_immutable_samplers(layout, binding); + + LLVMValueRef constants[] = { + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0), + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0), + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0), + LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0), + }; + return ac_build_gather_values(&ctx->ac, constants, 4); + } + + assert(stride % type_size == 0); + + if (!index) + index = ctx->ac.i32_0; + + index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), ""); + + list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0)); + list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), ""); + + return ac_build_load_to_sgpr(&ctx->ac, list, index); +} + + +static void +handle_vs_input_decl(struct radv_shader_context *ctx, + struct nir_variable *variable) +{ + LLVMValueRef t_list_ptr = ctx->vertex_buffers; + LLVMValueRef t_offset; + LLVMValueRef t_list; + LLVMValueRef input; + LLVMValueRef buffer_index; + int index = variable->data.location - VERT_ATTRIB_GENERIC0; + int idx = variable->data.location; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, true); + uint8_t input_usage_mask = + ctx->shader_info->info.vs.input_usage_mask[variable->data.location]; + unsigned num_channels = util_last_bit(input_usage_mask); + + variable->data.driver_location = idx * 4; + + for (unsigned i = 0; i < attrib_count; ++i, ++idx) { + if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) { + buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id, + ctx->abi.start_instance, ""); + if (ctx->options->key.vs.as_ls) { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt); + } else { + ctx->shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt); + } + } else + buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id, + ctx->abi.base_vertex, ""); + t_offset = LLVMConstInt(ctx->ac.i32, index + i, false); + + t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset); + + input = ac_build_buffer_load_format(&ctx->ac, t_list, + buffer_index, + ctx->ac.i32_0, + num_channels, false, true); + + input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels); + + for (unsigned chan = 0; chan < 4; chan++) { + LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); + ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] = + ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, + input, llvm_chan, "")); + } + } +} + +static void interp_fs_input(struct radv_shader_context *ctx, + unsigned attr, + LLVMValueRef interp_param, + LLVMValueRef prim_mask, + LLVMValueRef result[4]) +{ + LLVMValueRef attr_number; + unsigned chan; + LLVMValueRef i, j; + bool interp = interp_param != NULL; + + attr_number = LLVMConstInt(ctx->ac.i32, attr, false); + + /* fs.constant returns the param from the middle vertex, so it's not + * really useful for flat shading. It's meant to be used for custom + * interpolation (but the intrinsic can't fetch from the other two + * vertices). + * + * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state + * to do the right thing. The only reason we use fs.constant is that + * fs.interp cannot be used on integers, because they can be equal + * to NaN. + */ + if (interp) { + interp_param = LLVMBuildBitCast(ctx->ac.builder, interp_param, + ctx->ac.v2f32, ""); + + i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, + ctx->ac.i32_0, ""); + j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, + ctx->ac.i32_1, ""); + } + + for (chan = 0; chan < 4; chan++) { + LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false); + + if (interp) { + result[chan] = ac_build_fs_interp(&ctx->ac, + llvm_chan, + attr_number, + prim_mask, i, j); + } else { + result[chan] = ac_build_fs_interp_mov(&ctx->ac, + LLVMConstInt(ctx->ac.i32, 2, false), + llvm_chan, + attr_number, + prim_mask); + } + } +} + +static void +handle_fs_input_decl(struct radv_shader_context *ctx, + struct nir_variable *variable) +{ + int idx = variable->data.location; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + LLVMValueRef interp; + + variable->data.driver_location = idx * 4; + ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location; + + if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) { + unsigned interp_type; + if (variable->data.sample) + interp_type = INTERP_SAMPLE; + else if (variable->data.centroid) + interp_type = INTERP_CENTROID; + else + interp_type = INTERP_CENTER; + + interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type); + } else + interp = NULL; + + for (unsigned i = 0; i < attrib_count; ++i) + ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp; + +} + +static void +handle_vs_inputs(struct radv_shader_context *ctx, + struct nir_shader *nir) { + nir_foreach_variable(variable, &nir->inputs) + handle_vs_input_decl(ctx, variable); +} + +static void +prepare_interp_optimize(struct radv_shader_context *ctx, + struct nir_shader *nir) +{ + if (!ctx->options->key.fs.multisample) + return; + + bool uses_center = false; + bool uses_centroid = false; + nir_foreach_variable(variable, &nir->inputs) { + if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT || + variable->data.sample) + continue; + + if (variable->data.centroid) + uses_centroid = true; + else + uses_center = true; + } + + if (uses_center && uses_centroid) { + LLVMValueRef sel = LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT, ctx->abi.prim_mask, ctx->ac.i32_0, ""); + ctx->persp_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->persp_center, ctx->persp_centroid, ""); + ctx->linear_centroid = LLVMBuildSelect(ctx->ac.builder, sel, ctx->linear_center, ctx->linear_centroid, ""); + } +} + +static void +handle_fs_inputs(struct radv_shader_context *ctx, + struct nir_shader *nir) +{ + prepare_interp_optimize(ctx, nir); + + nir_foreach_variable(variable, &nir->inputs) + handle_fs_input_decl(ctx, variable); + + unsigned index = 0; + + if (ctx->shader_info->info.ps.uses_input_attachments || + ctx->shader_info->info.needs_multiview_view_index) + ctx->input_mask |= 1ull << VARYING_SLOT_LAYER; + + for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) { + LLVMValueRef interp_param; + LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0); + + if (!(ctx->input_mask & (1ull << i))) + continue; + + if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC || + i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) { + interp_param = *inputs; + interp_fs_input(ctx, index, interp_param, ctx->abi.prim_mask, + inputs); + + if (!interp_param) + ctx->shader_info->fs.flat_shaded_mask |= 1u << index; + ++index; + } else if (i == VARYING_SLOT_POS) { + for(int i = 0; i < 3; ++i) + inputs[i] = ctx->abi.frag_pos[i]; + + inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, + ctx->abi.frag_pos[3]); + } + } + ctx->shader_info->fs.num_interp = index; + ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0; + + if (ctx->shader_info->info.needs_multiview_view_index) + ctx->abi.view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; +} + +static void +scan_shader_output_decl(struct radv_shader_context *ctx, + struct nir_variable *variable, + struct nir_shader *shader, + gl_shader_stage stage) +{ + int idx = variable->data.location + variable->data.index; + unsigned attrib_count = glsl_count_attribute_slots(variable->type, false); + uint64_t mask_attribs; + + variable->data.driver_location = idx * 4; + + /* tess ctrl has it's own load/store paths for outputs */ + if (stage == MESA_SHADER_TESS_CTRL) + return; + + mask_attribs = ((1ull << attrib_count) - 1) << idx; + if (stage == MESA_SHADER_VERTEX || + stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY) { + if (idx == VARYING_SLOT_CLIP_DIST0) { + int length = shader->info.clip_distance_array_size + + shader->info.cull_distance_array_size; + if (stage == MESA_SHADER_VERTEX) { + ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; + ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; + } + if (stage == MESA_SHADER_TESS_EVAL) { + ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1; + ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1; + } + + if (length > 4) + attrib_count = 2; + else + attrib_count = 1; + mask_attribs = 1ull << idx; + } + } + + ctx->output_mask |= mask_attribs; +} + + +/* Initialize arguments for the shader export intrinsic */ +static void +si_llvm_init_export_args(struct radv_shader_context *ctx, + LLVMValueRef *values, + unsigned enabled_channels, + unsigned target, + struct ac_export_args *args) +{ + /* Specify the channels that are enabled. */ + args->enabled_channels = enabled_channels; + + /* Specify whether the EXEC mask represents the valid mask */ + args->valid_mask = 0; + + /* Specify whether this is the last export */ + args->done = 0; + + /* Specify the target we are exporting */ + args->target = target; + + args->compr = false; + args->out[0] = LLVMGetUndef(ctx->ac.f32); + args->out[1] = LLVMGetUndef(ctx->ac.f32); + args->out[2] = LLVMGetUndef(ctx->ac.f32); + args->out[3] = LLVMGetUndef(ctx->ac.f32); + + if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) { + unsigned index = target - V_008DFC_SQ_EXP_MRT; + unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf; + bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1; + bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1; + unsigned chan; + + LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL; + LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2], + unsigned bits, bool hi) = NULL; + + switch(col_format) { + case V_028714_SPI_SHADER_ZERO: + args->enabled_channels = 0; /* writemask */ + args->target = V_008DFC_SQ_EXP_NULL; + break; + + case V_028714_SPI_SHADER_32_R: + args->enabled_channels = 1; + args->out[0] = values[0]; + break; + + case V_028714_SPI_SHADER_32_GR: + args->enabled_channels = 0x3; + args->out[0] = values[0]; + args->out[1] = values[1]; + break; + + case V_028714_SPI_SHADER_32_AR: + args->enabled_channels = 0x9; + args->out[0] = values[0]; + args->out[3] = values[3]; + break; + + case V_028714_SPI_SHADER_FP16_ABGR: + args->enabled_channels = 0x5; + packf = ac_build_cvt_pkrtz_f16; + break; + + case V_028714_SPI_SHADER_UNORM16_ABGR: + args->enabled_channels = 0x5; + packf = ac_build_cvt_pknorm_u16; + break; + + case V_028714_SPI_SHADER_SNORM16_ABGR: + args->enabled_channels = 0x5; + packf = ac_build_cvt_pknorm_i16; + break; + + case V_028714_SPI_SHADER_UINT16_ABGR: + args->enabled_channels = 0x5; + packi = ac_build_cvt_pk_u16; + break; + + case V_028714_SPI_SHADER_SINT16_ABGR: + args->enabled_channels = 0x5; + packi = ac_build_cvt_pk_i16; + break; + + default: + case V_028714_SPI_SHADER_32_ABGR: + memcpy(&args->out[0], values, sizeof(values[0]) * 4); + break; + } + + /* Pack f16 or norm_i16/u16. */ + if (packf) { + for (chan = 0; chan < 2; chan++) { + LLVMValueRef pack_args[2] = { + values[2 * chan], + values[2 * chan + 1] + }; + LLVMValueRef packed; + + packed = packf(&ctx->ac, pack_args); + args->out[chan] = ac_to_float(&ctx->ac, packed); + } + args->compr = 1; /* COMPR flag */ + } + + /* Pack i16/u16. */ + if (packi) { + for (chan = 0; chan < 2; chan++) { + LLVMValueRef pack_args[2] = { + ac_to_integer(&ctx->ac, values[2 * chan]), + ac_to_integer(&ctx->ac, values[2 * chan + 1]) + }; + LLVMValueRef packed; + + packed = packi(&ctx->ac, pack_args, + is_int8 ? 8 : is_int10 ? 10 : 16, + chan == 1); + args->out[chan] = ac_to_float(&ctx->ac, packed); + } + args->compr = 1; /* COMPR flag */ + } + return; + } + + memcpy(&args->out[0], values, sizeof(values[0]) * 4); + + for (unsigned i = 0; i < 4; ++i) { + if (!(args->enabled_channels & (1 << i))) + continue; + + args->out[i] = ac_to_float(&ctx->ac, args->out[i]); + } +} + +static void +radv_export_param(struct radv_shader_context *ctx, unsigned index, + LLVMValueRef *values, unsigned enabled_channels) +{ + struct ac_export_args args; + + si_llvm_init_export_args(ctx, values, enabled_channels, + V_008DFC_SQ_EXP_PARAM + index, &args); + ac_build_export(&ctx->ac, &args); +} + +static LLVMValueRef +radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) +{ + LLVMValueRef output = + ctx->abi.outputs[radeon_llvm_reg_index_soa(index, chan)]; + + return LLVMBuildLoad(ctx->ac.builder, output, ""); +} + +static void +handle_vs_outputs_post(struct radv_shader_context *ctx, + bool export_prim_id, + struct ac_vs_output_info *outinfo) +{ + uint32_t param_count = 0; + unsigned target; + unsigned pos_idx, num_pos_exports = 0; + struct ac_export_args args, pos_args[4] = {}; + LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL; + int i; + + if (ctx->options->key.has_multiview_view_index) { + LLVMValueRef* tmp_out = &ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]; + if(!*tmp_out) { + for(unsigned i = 0; i < 4; ++i) + ctx->abi.outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] = + ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, ""); + } + + LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, ctx->abi.view_index), *tmp_out); + ctx->output_mask |= 1ull << VARYING_SLOT_LAYER; + } + + memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, + sizeof(outinfo->vs_output_param_offset)); + + if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) { + LLVMValueRef slots[8]; + unsigned j; + + if (outinfo->cull_dist_mask) + outinfo->cull_dist_mask <<= ctx->num_output_clips; + + i = VARYING_SLOT_CLIP_DIST0; + for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++) + slots[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); + + for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++) + slots[i] = LLVMGetUndef(ctx->ac.f32); + + if (ctx->num_output_clips + ctx->num_output_culls > 4) { + target = V_008DFC_SQ_EXP_POS + 3; + si_llvm_init_export_args(ctx, &slots[4], 0xf, target, &args); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); + } + + target = V_008DFC_SQ_EXP_POS + 2; + si_llvm_init_export_args(ctx, &slots[0], 0xf, target, &args); + memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS], + &args, sizeof(args)); + + } + + LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1}; + if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) { + for (unsigned j = 0; j < 4; j++) + pos_values[j] = radv_load_output(ctx, VARYING_SLOT_POS, j); + } + si_llvm_init_export_args(ctx, pos_values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]); + + if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) { + outinfo->writes_pointsize = true; + psize_value = radv_load_output(ctx, VARYING_SLOT_PSIZ, 0); + } + + if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) { + outinfo->writes_layer = true; + layer_value = radv_load_output(ctx, VARYING_SLOT_LAYER, 0); + } + + if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) { + outinfo->writes_viewport_index = true; + viewport_index_value = radv_load_output(ctx, VARYING_SLOT_VIEWPORT, 0); + } + + if (outinfo->writes_pointsize || + outinfo->writes_layer || + outinfo->writes_viewport_index) { + pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) | + (outinfo->writes_layer == true ? 4 : 0)); + pos_args[1].valid_mask = 0; + pos_args[1].done = 0; + pos_args[1].target = V_008DFC_SQ_EXP_POS + 1; + pos_args[1].compr = 0; + pos_args[1].out[0] = ctx->ac.f32_0; /* X */ + pos_args[1].out[1] = ctx->ac.f32_0; /* Y */ + pos_args[1].out[2] = ctx->ac.f32_0; /* Z */ + pos_args[1].out[3] = ctx->ac.f32_0; /* W */ + + if (outinfo->writes_pointsize == true) + pos_args[1].out[0] = psize_value; + if (outinfo->writes_layer == true) + pos_args[1].out[2] = layer_value; + if (outinfo->writes_viewport_index == true) { + if (ctx->options->chip_class >= GFX9) { + /* GFX9 has the layer in out.z[10:0] and the viewport + * index in out.z[19:16]. + */ + LLVMValueRef v = viewport_index_value; + v = ac_to_integer(&ctx->ac, v); + v = LLVMBuildShl(ctx->ac.builder, v, + LLVMConstInt(ctx->ac.i32, 16, false), + ""); + v = LLVMBuildOr(ctx->ac.builder, v, + ac_to_integer(&ctx->ac, pos_args[1].out[2]), ""); + + pos_args[1].out[2] = ac_to_float(&ctx->ac, v); + pos_args[1].enabled_channels |= 1 << 2; + } else { + pos_args[1].out[3] = viewport_index_value; + pos_args[1].enabled_channels |= 1 << 3; + } + } + } + for (i = 0; i < 4; i++) { + if (pos_args[i].out[0]) + num_pos_exports++; + } + + pos_idx = 0; + for (i = 0; i < 4; i++) { + if (!pos_args[i].out[0]) + continue; + + /* Specify the target we are exporting */ + pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++; + if (pos_idx == num_pos_exports) + pos_args[i].done = 1; + ac_build_export(&ctx->ac, &pos_args[i]); + } + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef values[4]; + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i != VARYING_SLOT_LAYER && + i != VARYING_SLOT_PRIMITIVE_ID && + i < VARYING_SLOT_VAR0) + continue; + + for (unsigned j = 0; j < 4; j++) + values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); + + unsigned output_usage_mask; + + if (ctx->stage == MESA_SHADER_VERTEX && + !ctx->is_gs_copy_shader) { + output_usage_mask = + ctx->shader_info->info.vs.output_usage_mask[i]; + } else if (ctx->stage == MESA_SHADER_TESS_EVAL) { + output_usage_mask = + ctx->shader_info->info.tes.output_usage_mask[i]; + } else { + /* Enable all channels for the GS copy shader because + * we don't know the output usage mask currently. + */ + output_usage_mask = 0xf; + } + + radv_export_param(ctx, param_count, values, output_usage_mask); + + outinfo->vs_output_param_offset[i] = param_count++; + } + + if (export_prim_id) { + LLVMValueRef values[4]; + + values[0] = ctx->vs_prim_id; + ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2, + ctx->shader_info->vs.vgpr_comp_cnt); + for (unsigned j = 1; j < 4; j++) + values[j] = ctx->ac.f32_0; + + radv_export_param(ctx, param_count, values, 0xf); + + outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++; + outinfo->export_prim_id = true; + } + + outinfo->pos_exports = num_pos_exports; + outinfo->param_exports = param_count; +} + +static void +handle_es_outputs_post(struct radv_shader_context *ctx, + struct ac_es_output_info *outinfo) +{ + int j; + uint64_t max_output_written = 0; + LLVMValueRef lds_base = NULL; + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + int param_index; + int length = 4; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0) + length = ctx->num_output_clips + ctx->num_output_culls; + + param_index = shader_io_get_unique_index(i); + + max_output_written = MAX2(param_index + (length > 4), max_output_written); + } + + outinfo->esgs_itemsize = (max_output_written + 1) * 16; + + if (ctx->ac.chip_class >= GFX9) { + unsigned itemsize_dw = outinfo->esgs_itemsize / 4; + LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac); + LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, + LLVMConstInt(ctx->ac.i32, 24, false), + LLVMConstInt(ctx->ac.i32, 4, false), false); + vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx, + LLVMBuildMul(ctx->ac.builder, wave_idx, + LLVMConstInt(ctx->ac.i32, 64, false), ""), ""); + lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx, + LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), ""); + } + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef dw_addr = NULL; + LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; + int param_index; + int length = 4; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0) + length = ctx->num_output_clips + ctx->num_output_culls; + + param_index = shader_io_get_unique_index(i); + + if (lds_base) { + dw_addr = LLVMBuildAdd(ctx->ac.builder, lds_base, + LLVMConstInt(ctx->ac.i32, param_index * 4, false), + ""); + } + for (j = 0; j < length; j++) { + LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], ""); + out_val = LLVMBuildBitCast(ctx->ac.builder, out_val, ctx->ac.i32, ""); + + if (ctx->ac.chip_class >= GFX9) { + ac_lds_store(&ctx->ac, dw_addr, + LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); + } else { + ac_build_buffer_store_dword(&ctx->ac, + ctx->esgs_ring, + out_val, 1, + NULL, ctx->es2gs_offset, + (4 * param_index + j) * 4, + 1, 1, true, true); + } + } + } +} + +static void +handle_ls_outputs_post(struct radv_shader_context *ctx) +{ + LLVMValueRef vertex_id = ctx->rel_auto_id; + LLVMValueRef vertex_dw_stride = ac_unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8); + LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, + vertex_dw_stride, ""); + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef *out_ptr = &ctx->abi.outputs[i * 4]; + int length = 4; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0) + length = ctx->num_output_clips + ctx->num_output_culls; + int param = shader_io_get_unique_index(i); + mark_tess_output(ctx, false, param); + if (length > 4) + mark_tess_output(ctx, false, param + 1); + LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr, + LLVMConstInt(ctx->ac.i32, param * 4, false), + ""); + for (unsigned j = 0; j < length; j++) { + ac_lds_store(&ctx->ac, dw_addr, + LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "")); + dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, ctx->ac.i32_1, ""); + } + } +} + +struct ac_build_if_state +{ + struct radv_shader_context *ctx; + LLVMValueRef condition; + LLVMBasicBlockRef entry_block; + LLVMBasicBlockRef true_block; + LLVMBasicBlockRef false_block; + LLVMBasicBlockRef merge_block; +}; + +static LLVMBasicBlockRef +ac_build_insert_new_block(struct radv_shader_context *ctx, const char *name) +{ + LLVMBasicBlockRef current_block; + LLVMBasicBlockRef next_block; + LLVMBasicBlockRef new_block; + + /* get current basic block */ + current_block = LLVMGetInsertBlock(ctx->ac.builder); + + /* chqeck if there's another block after this one */ + next_block = LLVMGetNextBasicBlock(current_block); + if (next_block) { + /* insert the new block before the next block */ + new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name); + } + else { + /* append new block after current block */ + LLVMValueRef function = LLVMGetBasicBlockParent(current_block); + new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name); + } + return new_block; +} + +static void +ac_nir_build_if(struct ac_build_if_state *ifthen, + struct radv_shader_context *ctx, + LLVMValueRef condition) +{ + LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->ac.builder); + + memset(ifthen, 0, sizeof *ifthen); + ifthen->ctx = ctx; + ifthen->condition = condition; + ifthen->entry_block = block; + + /* create endif/merge basic block for the phi functions */ + ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block"); + + /* create/insert true_block before merge_block */ + ifthen->true_block = + LLVMInsertBasicBlockInContext(ctx->context, + ifthen->merge_block, + "if-true-block"); + + /* successive code goes into the true block */ + LLVMPositionBuilderAtEnd(ctx->ac.builder, ifthen->true_block); +} + +/** + * End a conditional. + */ +static void +ac_nir_build_endif(struct ac_build_if_state *ifthen) +{ + LLVMBuilderRef builder = ifthen->ctx->ac.builder; + + /* Insert branch to the merge block from current block */ + LLVMBuildBr(builder, ifthen->merge_block); + + /* + * Now patch in the various branch instructions. + */ + + /* Insert the conditional branch instruction at the end of entry_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->entry_block); + if (ifthen->false_block) { + /* we have an else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->false_block); + } + else { + /* no else clause */ + LLVMBuildCondBr(builder, ifthen->condition, + ifthen->true_block, ifthen->merge_block); + } + + /* Resume building code at end of the ifthen->merge_block */ + LLVMPositionBuilderAtEnd(builder, ifthen->merge_block); +} + +static void +write_tess_factors(struct radv_shader_context *ctx) +{ + unsigned stride, outer_comps, inner_comps; + struct ac_build_if_state if_ctx, inner_if_ctx; + LLVMValueRef invocation_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5); + LLVMValueRef rel_patch_id = ac_unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8); + unsigned tess_inner_index = 0, tess_outer_index; + LLVMValueRef lds_base, lds_inner = NULL, lds_outer, byteoffset, buffer; + LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; + int i; + ac_emit_barrier(&ctx->ac, ctx->stage); + + switch (ctx->options->key.tcs.primitive_mode) { + case GL_ISOLINES: + stride = 2; + outer_comps = 2; + inner_comps = 0; + break; + case GL_TRIANGLES: + stride = 4; + outer_comps = 3; + inner_comps = 1; + break; + case GL_QUADS: + stride = 6; + outer_comps = 4; + inner_comps = 2; + break; + default: + return; + } + + ac_nir_build_if(&if_ctx, ctx, + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, + invocation_id, ctx->ac.i32_0, "")); + + lds_base = get_tcs_out_current_patch_data_offset(ctx); + + if (inner_comps) { + tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); + mark_tess_output(ctx, true, tess_inner_index); + lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base, + LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), ""); + } + + tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); + mark_tess_output(ctx, true, tess_outer_index); + lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base, + LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), ""); + + for (i = 0; i < 4; i++) { + inner[i] = LLVMGetUndef(ctx->ac.i32); + outer[i] = LLVMGetUndef(ctx->ac.i32); + } + + // LINES reverseal + if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) { + outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer); + lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, + ctx->ac.i32_1, ""); + outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer); + } else { + for (i = 0; i < outer_comps; i++) { + outer[i] = out[i] = + ac_lds_load(&ctx->ac, lds_outer); + lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_outer, + ctx->ac.i32_1, ""); + } + for (i = 0; i < inner_comps; i++) { + inner[i] = out[outer_comps+i] = + ac_lds_load(&ctx->ac, lds_inner); + lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_inner, + ctx->ac.i32_1, ""); + } + } + + /* Convert the outputs to vectors for stores. */ + vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4)); + vec1 = NULL; + + if (stride > 4) + vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); + + + buffer = ctx->hs_ring_tess_factor; + tf_base = ctx->tess_factor_offset; + byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id, + LLVMConstInt(ctx->ac.i32, 4 * stride, false), ""); + unsigned tf_offset = 0; + + if (ctx->options->chip_class <= VI) { + ac_nir_build_if(&inner_if_ctx, ctx, + LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, + rel_patch_id, ctx->ac.i32_0, "")); + + /* Store the dynamic HS control word. */ + ac_build_buffer_store_dword(&ctx->ac, buffer, + LLVMConstInt(ctx->ac.i32, 0x80000000, false), + 1, ctx->ac.i32_0, tf_base, + 0, 1, 0, true, false); + tf_offset += 4; + + ac_nir_build_endif(&inner_if_ctx); + } + + /* Store the tessellation factors. */ + ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, + MIN2(stride, 4), byteoffset, tf_base, + tf_offset, 1, 0, true, false); + if (vec1) + ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, + stride - 4, byteoffset, tf_base, + 16 + tf_offset, 1, 0, true, false); + + //store to offchip for TES to read - only if TES reads them + if (ctx->options->key.tcs.tes_reads_tess_factors) { + LLVMValueRef inner_vec, outer_vec, tf_outer_offset; + LLVMValueRef tf_inner_offset; + unsigned param_outer, param_inner; + + param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER); + tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL, + LLVMConstInt(ctx->ac.i32, param_outer, 0)); + + outer_vec = ac_build_gather_values(&ctx->ac, outer, + util_next_power_of_two(outer_comps)); + + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec, + outer_comps, tf_outer_offset, + ctx->oc_lds, 0, 1, 0, true, false); + if (inner_comps) { + param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER); + tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL, + LLVMConstInt(ctx->ac.i32, param_inner, 0)); + + inner_vec = inner_comps == 1 ? inner[0] : + ac_build_gather_values(&ctx->ac, inner, inner_comps); + ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec, + inner_comps, tf_inner_offset, + ctx->oc_lds, 0, 1, 0, true, false); + } + } + ac_nir_build_endif(&if_ctx); +} + +static void +handle_tcs_outputs_post(struct radv_shader_context *ctx) +{ + write_tess_factors(ctx); +} + +static bool +si_export_mrt_color(struct radv_shader_context *ctx, + LLVMValueRef *color, unsigned index, + struct ac_export_args *args) +{ + /* Export */ + si_llvm_init_export_args(ctx, color, 0xf, + V_008DFC_SQ_EXP_MRT + index, args); + if (!args->enabled_channels) + return false; /* unnecessary NULL export */ + + return true; +} + +static void +radv_export_mrt_z(struct radv_shader_context *ctx, + LLVMValueRef depth, LLVMValueRef stencil, + LLVMValueRef samplemask) +{ + struct ac_export_args args; + + ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args); + + ac_build_export(&ctx->ac, &args); +} + +static void +handle_fs_outputs_post(struct radv_shader_context *ctx) +{ + unsigned index = 0; + LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; + struct ac_export_args color_args[8]; + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + LLVMValueRef values[4]; + + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i < FRAG_RESULT_DATA0) + continue; + + for (unsigned j = 0; j < 4; j++) + values[j] = ac_to_float(&ctx->ac, + radv_load_output(ctx, i, j)); + + bool ret = si_export_mrt_color(ctx, values, + i - FRAG_RESULT_DATA0, + &color_args[index]); + if (ret) + index++; + } + + /* Process depth, stencil, samplemask. */ + if (ctx->shader_info->info.ps.writes_z) { + depth = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_DEPTH, 0)); + } + if (ctx->shader_info->info.ps.writes_stencil) { + stencil = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_STENCIL, 0)); + } + if (ctx->shader_info->info.ps.writes_sample_mask) { + samplemask = ac_to_float(&ctx->ac, + radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0)); + } + + /* Set the DONE bit on last non-null color export only if Z isn't + * exported. + */ + if (index > 0 && + !ctx->shader_info->info.ps.writes_z && + !ctx->shader_info->info.ps.writes_stencil && + !ctx->shader_info->info.ps.writes_sample_mask) { + unsigned last = index - 1; + + color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */ + color_args[last].done = 1; /* DONE bit */ + } + + /* Export PS outputs. */ + for (unsigned i = 0; i < index; i++) + ac_build_export(&ctx->ac, &color_args[i]); + + if (depth || stencil || samplemask) + radv_export_mrt_z(ctx, depth, stencil, samplemask); + else if (!index) + ac_build_export_null(&ctx->ac); +} + +static void +emit_gs_epilogue(struct radv_shader_context *ctx) +{ + ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id); +} + +static void +handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, + LLVMValueRef *addrs) +{ + struct radv_shader_context *ctx = radv_shader_context_from_abi(abi); + + switch (ctx->stage) { + case MESA_SHADER_VERTEX: + if (ctx->options->key.vs.as_ls) + handle_ls_outputs_post(ctx); + else if (ctx->options->key.vs.as_es) + handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info); + else + handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id, + &ctx->shader_info->vs.outinfo); + break; + case MESA_SHADER_FRAGMENT: + handle_fs_outputs_post(ctx); + break; + case MESA_SHADER_GEOMETRY: + emit_gs_epilogue(ctx); + break; + case MESA_SHADER_TESS_CTRL: + handle_tcs_outputs_post(ctx); + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->options->key.tes.as_es) + handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info); + else + handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id, + &ctx->shader_info->tes.outinfo); + break; + default: + break; + } +} + +static void ac_llvm_finalize_module(struct radv_shader_context *ctx) +{ + LLVMPassManagerRef passmgr; + /* Create the pass manager */ + passmgr = LLVMCreateFunctionPassManagerForModule( + ctx->ac.module); + + /* This pass should eliminate all the load and store instructions */ + LLVMAddPromoteMemoryToRegisterPass(passmgr); + + /* Add some optimization passes */ + LLVMAddScalarReplAggregatesPass(passmgr); + LLVMAddLICMPass(passmgr); + LLVMAddAggressiveDCEPass(passmgr); + LLVMAddCFGSimplificationPass(passmgr); + LLVMAddInstructionCombiningPass(passmgr); + + /* Run the pass */ + LLVMInitializeFunctionPassManager(passmgr); + LLVMRunFunctionPassManager(passmgr, ctx->main_function); + LLVMFinalizeFunctionPassManager(passmgr); + + LLVMDisposeBuilder(ctx->ac.builder); + LLVMDisposePassManager(passmgr); + + ac_llvm_context_dispose(&ctx->ac); +} + +static void +ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx) +{ + struct ac_vs_output_info *outinfo; + + switch (ctx->stage) { + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_GEOMETRY: + return; + case MESA_SHADER_VERTEX: + if (ctx->options->key.vs.as_ls || + ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->vs.outinfo; + break; + case MESA_SHADER_TESS_EVAL: + if (ctx->options->key.vs.as_es) + return; + outinfo = &ctx->shader_info->tes.outinfo; + break; + default: + unreachable("Unhandled shader type"); + } + + ac_optimize_vs_outputs(&ctx->ac, + ctx->main_function, + outinfo->vs_output_param_offset, + VARYING_SLOT_MAX, + &outinfo->param_exports); +} + +static void +ac_setup_rings(struct radv_shader_context *ctx) +{ + if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) || + (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) { + ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false)); + } + + if (ctx->is_gs_copy_shader) { + ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false)); + } + if (ctx->stage == MESA_SHADER_GEOMETRY) { + LLVMValueRef tmp; + ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false)); + ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false)); + + ctx->gsvs_ring = LLVMBuildBitCast(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.v4i32, ""); + + ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), ""); + tmp = LLVMBuildExtractElement(ctx->ac.builder, ctx->gsvs_ring, ctx->ac.i32_1, ""); + tmp = LLVMBuildOr(ctx->ac.builder, tmp, ctx->gsvs_ring_stride, ""); + ctx->gsvs_ring = LLVMBuildInsertElement(ctx->ac.builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, ""); + } + + if (ctx->stage == MESA_SHADER_TESS_CTRL || + ctx->stage == MESA_SHADER_TESS_EVAL) { + ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false)); + ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false)); + } +} + +static unsigned +ac_nir_get_max_workgroup_size(enum chip_class chip_class, + const struct nir_shader *nir) +{ + switch (nir->info.stage) { + case MESA_SHADER_TESS_CTRL: + return chip_class >= CIK ? 128 : 64; + case MESA_SHADER_GEOMETRY: + return chip_class >= GFX9 ? 128 : 64; + case MESA_SHADER_COMPUTE: + break; + default: + return 0; + } + + unsigned max_workgroup_size = nir->info.cs.local_size[0] * + nir->info.cs.local_size[1] * + nir->info.cs.local_size[2]; + return max_workgroup_size; +} + +/* Fixup the HW not emitting the TCS regs if there are no HS threads. */ +static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx) +{ + LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, + LLVMConstInt(ctx->ac.i32, 8, false), + LLVMConstInt(ctx->ac.i32, 8, false), false); + LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, + ctx->ac.i32_0, ""); + ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, ""); + ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, ""); + ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, ""); + ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, ""); +} + +static void prepare_gs_input_vgprs(struct radv_shader_context *ctx) +{ + for(int i = 5; i >= 0; --i) { + ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1], + LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false), + LLVMConstInt(ctx->ac.i32, 16, false), false); + } + + ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info, + LLVMConstInt(ctx->ac.i32, 16, false), + LLVMConstInt(ctx->ac.i32, 8, false), false); +} + + +static +LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm, + struct nir_shader *const *shaders, + int shader_count, + struct ac_shader_variant_info *shader_info, + const struct ac_nir_compiler_options *options, + bool dump_shader) +{ + struct radv_shader_context ctx = {0}; + unsigned i; + ctx.options = options; + ctx.shader_info = shader_info; + ctx.context = LLVMContextCreate(); + + ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, + options->family); + ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); + LLVMSetTarget(ctx.ac.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--"); + + LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm); + char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout); + LLVMSetDataLayout(ctx.ac.module, data_layout_str); + LLVMDisposeTargetData(data_layout); + LLVMDisposeMessage(data_layout_str); + + enum ac_float_mode float_mode = + options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : + AC_FLOAT_MODE_DEFAULT; + + ctx.ac.builder = ac_create_builder(ctx.context, float_mode); + + memset(shader_info, 0, sizeof(*shader_info)); + + for(int i = 0; i < shader_count; ++i) + ac_nir_shader_info_pass(shaders[i], options, &shader_info->info); + + for (i = 0; i < AC_UD_MAX_SETS; i++) + shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; + for (i = 0; i < AC_UD_MAX_UD; i++) + shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1; + + ctx.max_workgroup_size = 0; + for (int i = 0; i < shader_count; ++i) { + ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size, + ac_nir_get_max_workgroup_size(ctx.options->chip_class, + shaders[i])); + } + + create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2, + shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX); + + ctx.abi.inputs = &ctx.inputs[0]; + ctx.abi.emit_outputs = handle_shader_outputs_post; + ctx.abi.emit_vertex = visit_emit_vertex; + ctx.abi.load_ubo = radv_load_ubo; + ctx.abi.load_ssbo = radv_load_ssbo; + ctx.abi.load_sampler_desc = radv_get_sampler_desc; + ctx.abi.load_resource = radv_load_resource; + ctx.abi.clamp_shadow_reference = false; + + if (shader_count >= 2) + ac_init_exec_full_mask(&ctx.ac); + + if (ctx.ac.chip_class == GFX9 && + shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) + ac_nir_fixup_ls_hs_input_vgprs(&ctx); + + for(int i = 0; i < shader_count; ++i) { + ctx.stage = shaders[i]->info.stage; + ctx.output_mask = 0; + ctx.tess_outputs_written = 0; + ctx.num_output_clips = shaders[i]->info.clip_distance_array_size; + ctx.num_output_culls = shaders[i]->info.cull_distance_array_size; + + if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { + ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex"); + ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out; + ctx.abi.load_inputs = load_gs_input; + ctx.abi.emit_primitive = visit_end_primitive; + } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { + ctx.tcs_outputs_read = shaders[i]->info.outputs_read; + ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read; + ctx.abi.load_tess_varyings = load_tcs_varyings; + ctx.abi.load_patch_vertices_in = load_patch_vertices_in; + ctx.abi.store_tcs_outputs = store_tcs_output; + ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; + } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) { + ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode; + ctx.abi.load_tess_varyings = load_tes_input; + ctx.abi.load_tess_coord = load_tess_coord; + ctx.abi.load_patch_vertices_in = load_patch_vertices_in; + ctx.tcs_vertices_per_patch = shaders[i]->info.tess.tcs_vertices_out; + } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) { + if (shader_info->info.vs.needs_instance_id) { + if (ctx.options->key.vs.as_ls) { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt); + } else { + ctx.shader_info->vs.vgpr_comp_cnt = + MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt); + } + } + ctx.abi.load_base_vertex = radv_load_base_vertex; + } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) { + shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard; + ctx.abi.lookup_interp_param = lookup_interp_param; + ctx.abi.load_sample_position = load_sample_position; + ctx.abi.load_sample_mask_in = load_sample_mask_in; + ctx.abi.emit_kill = radv_emit_kill; + } + + if (i) + ac_emit_barrier(&ctx.ac, ctx.stage); + + ac_setup_rings(&ctx); + + LLVMBasicBlockRef merge_block; + if (shader_count >= 2) { + LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder)); + LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); + merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, ""); + + LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info, + LLVMConstInt(ctx.ac.i32, 8 * i, false), + LLVMConstInt(ctx.ac.i32, 8, false), false); + LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac); + LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, + thread_id, count, ""); + LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block); + + LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block); + } + + if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) + handle_fs_inputs(&ctx, shaders[i]); + else if(shaders[i]->info.stage == MESA_SHADER_VERTEX) + handle_vs_inputs(&ctx, shaders[i]); + else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY) + prepare_gs_input_vgprs(&ctx); + + nir_foreach_variable(variable, &shaders[i]->outputs) + scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage); + + ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i]); + + if (shader_count >= 2) { + LLVMBuildBr(ctx.ac.builder, merge_block); + LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block); + } + + if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) { + unsigned addclip = shaders[i]->info.clip_distance_array_size + + shaders[i]->info.cull_distance_array_size > 4; + shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16; + shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size * + shaders[i]->info.gs.vertices_out; + } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) { + shader_info->tcs.outputs_written = ctx.tess_outputs_written; + shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written; + } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) { + shader_info->vs.outputs_written = ctx.tess_outputs_written; + } + } + + LLVMBuildRetVoid(ctx.ac.builder); + + if (options->dump_preoptir) + ac_dump_module(ctx.ac.module); + + ac_llvm_finalize_module(&ctx); + + if (shader_count == 1) + ac_nir_eliminate_const_vs_outputs(&ctx); + + if (dump_shader) { + ctx.shader_info->private_mem_vgprs = + ac_count_scratch_private_memory(ctx.main_function); + } + + return ctx.ac.module; +} + +static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) +{ + unsigned *retval = (unsigned *)context; + LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); + char *description = LLVMGetDiagInfoDescription(di); + + if (severity == LLVMDSError) { + *retval = 1; + fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", + description); + } + + LLVMDisposeMessage(description); +} + +static unsigned ac_llvm_compile(LLVMModuleRef M, + struct ac_shader_binary *binary, + LLVMTargetMachineRef tm) +{ + unsigned retval = 0; + char *err; + LLVMContextRef llvm_ctx; + LLVMMemoryBufferRef out_buffer; + unsigned buffer_size; + const char *buffer_data; + LLVMBool mem_err; + + /* Setup Diagnostic Handler*/ + llvm_ctx = LLVMGetModuleContext(M); + + LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, + &retval); + + /* Compile IR*/ + mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, + &err, &out_buffer); + + /* Process Errors/Warnings */ + if (mem_err) { + fprintf(stderr, "%s: %s", __FUNCTION__, err); + free(err); + retval = 1; + goto out; + } + + /* Extract Shader Code*/ + buffer_size = LLVMGetBufferSize(out_buffer); + buffer_data = LLVMGetBufferStart(out_buffer); + + ac_elf_read(buffer_data, buffer_size, binary); + + /* Clean up */ + LLVMDisposeMemoryBuffer(out_buffer); + +out: + return retval; +} + +static void ac_compile_llvm_module(LLVMTargetMachineRef tm, + LLVMModuleRef llvm_module, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct ac_shader_variant_info *shader_info, + gl_shader_stage stage, + bool dump_shader, bool supports_spill) +{ + if (dump_shader) + ac_dump_module(llvm_module); + + memset(binary, 0, sizeof(*binary)); + int v = ac_llvm_compile(llvm_module, binary, tm); + if (v) { + fprintf(stderr, "compile failed\n"); + } + + if (dump_shader) + fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); + + ac_shader_binary_read_config(binary, config, 0, supports_spill); + + LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); + LLVMDisposeModule(llvm_module); + LLVMContextDispose(ctx); + + if (stage == MESA_SHADER_FRAGMENT) { + shader_info->num_input_vgprs = 0; + if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 3; + if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 2; + if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) + shader_info->num_input_vgprs += 1; + } + config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); + + /* +3 for scratch wave offset and VCC */ + config->num_sgprs = MAX2(config->num_sgprs, + shader_info->num_input_sgprs + 3); + + /* Enable 64-bit and 16-bit denormals, because there is no performance + * cost. + * + * If denormals are enabled, all floating-point output modifiers are + * ignored. + * + * Don't enable denormals for 32-bit floats, because: + * - Floating-point output modifiers would be ignored by the hw. + * - Some opcodes don't support denormals, such as v_mad_f32. We would + * have to stop using those. + * - SI & CI would be very slow. + */ + config->float_mode |= V_00B028_FP_64_DENORMS; +} + +static void +ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options) +{ + switch (nir->info.stage) { + case MESA_SHADER_COMPUTE: + for (int i = 0; i < 3; ++i) + shader_info->cs.block_size[i] = nir->info.cs.local_size[i]; + break; + case MESA_SHADER_FRAGMENT: + shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests; + break; + case MESA_SHADER_GEOMETRY: + shader_info->gs.vertices_in = nir->info.gs.vertices_in; + shader_info->gs.vertices_out = nir->info.gs.vertices_out; + shader_info->gs.output_prim = nir->info.gs.output_primitive; + shader_info->gs.invocations = nir->info.gs.invocations; + break; + case MESA_SHADER_TESS_EVAL: + shader_info->tes.primitive_mode = nir->info.tess.primitive_mode; + shader_info->tes.spacing = nir->info.tess.spacing; + shader_info->tes.ccw = nir->info.tess.ccw; + shader_info->tes.point_mode = nir->info.tess.point_mode; + shader_info->tes.as_es = options->key.tes.as_es; + break; + case MESA_SHADER_TESS_CTRL: + shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; + break; + case MESA_SHADER_VERTEX: + shader_info->vs.as_es = options->key.vs.as_es; + shader_info->vs.as_ls = options->key.vs.as_ls; + /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */ + if (options->key.vs.as_ls) + shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt); + break; + default: + break; + } +} + +void +radv_compile_nir_shader(LLVMTargetMachineRef tm, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct ac_shader_variant_info *shader_info, + struct nir_shader *const *nir, + int nir_count, + const struct ac_nir_compiler_options *options, + bool dump_shader) +{ + + LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info, + options, dump_shader); + + ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill); + for (int i = 0; i < nir_count; ++i) + ac_fill_shader_info(shader_info, nir[i], options); + + /* Determine the ES type (VS or TES) for the GS on GFX9. */ + if (options->chip_class == GFX9) { + if (nir_count == 2 && + nir[1]->info.stage == MESA_SHADER_GEOMETRY) { + shader_info->gs.es_type = nir[0]->info.stage; + } + } +} + +static void +ac_gs_copy_shader_emit(struct radv_shader_context *ctx) +{ + LLVMValueRef vtx_offset = + LLVMBuildMul(ctx->ac.builder, ctx->abi.vertex_id, + LLVMConstInt(ctx->ac.i32, 4, false), ""); + int idx = 0; + + for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) { + int length = 4; + int slot = idx; + int slot_inc = 1; + if (!(ctx->output_mask & (1ull << i))) + continue; + + if (i == VARYING_SLOT_CLIP_DIST0) { + /* unpack clip and cull from a single set of slots */ + length = ctx->num_output_clips + ctx->num_output_culls; + if (length > 4) + slot_inc = 2; + } + + for (unsigned j = 0; j < length; j++) { + LLVMValueRef value, soffset; + + soffset = LLVMConstInt(ctx->ac.i32, + (slot * 4 + j) * + ctx->gs_max_out_vertices * 16 * 4, false); + + value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring, + 1, ctx->ac.i32_0, + vtx_offset, soffset, + 0, 1, 1, true, false); + + LLVMBuildStore(ctx->ac.builder, + ac_to_float(&ctx->ac, value), ctx->abi.outputs[radeon_llvm_reg_index_soa(i, j)]); + } + idx += slot_inc; + } + handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo); +} + +void +radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, + struct nir_shader *geom_shader, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct ac_shader_variant_info *shader_info, + const struct ac_nir_compiler_options *options, + bool dump_shader) +{ + struct radv_shader_context ctx = {0}; + ctx.context = LLVMContextCreate(); + ctx.options = options; + ctx.shader_info = shader_info; + + ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class, + options->family); + ctx.ac.module = LLVMModuleCreateWithNameInContext("shader", ctx.context); + + ctx.is_gs_copy_shader = true; + LLVMSetTarget(ctx.ac.module, "amdgcn--"); + + enum ac_float_mode float_mode = + options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH : + AC_FLOAT_MODE_DEFAULT; + + ctx.ac.builder = ac_create_builder(ctx.context, float_mode); + ctx.stage = MESA_SHADER_VERTEX; + + create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX); + + ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out; + ac_setup_rings(&ctx); + + ctx.num_output_clips = geom_shader->info.clip_distance_array_size; + ctx.num_output_culls = geom_shader->info.cull_distance_array_size; + + nir_foreach_variable(variable, &geom_shader->outputs) { + scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX); + ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader, + variable, MESA_SHADER_VERTEX); + } + + ac_gs_copy_shader_emit(&ctx); + + LLVMBuildRetVoid(ctx.ac.builder); + + ac_llvm_finalize_module(&ctx); + + ac_compile_llvm_module(tm, ctx.ac.module, binary, config, shader_info, + MESA_SHADER_VERTEX, + dump_shader, options->supports_spill); +} diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 439522585a7..22850c81d68 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1677,6 +1677,24 @@ struct radv_fence { uint32_t temp_syncobj; }; +/* radv_nir_to_llvm.c */ +void radv_compile_gs_copy_shader(LLVMTargetMachineRef tm, + struct nir_shader *geom_shader, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct ac_shader_variant_info *shader_info, + const struct ac_nir_compiler_options *options, + bool dump_shader); + +void radv_compile_nir_shader(LLVMTargetMachineRef tm, + struct ac_shader_binary *binary, + struct ac_shader_config *config, + struct ac_shader_variant_info *shader_info, + struct nir_shader *const *nir, + int nir_count, + const struct ac_nir_compiler_options *options, + bool dump_shader); + struct radeon_winsys_sem; #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \ diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 85672e600d7..1a5e76b8cfb 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -473,12 +473,13 @@ shader_variant_create(struct radv_device *device, if (gs_copy_shader) { assert(shader_count == 1); - ac_create_gs_copy_shader(tm, *shaders, &binary, &variant->config, - &variant->info, options, dump_shaders); + radv_compile_gs_copy_shader(tm, *shaders, &binary, + &variant->config, &variant->info, + options, dump_shaders); } else { - ac_compile_nir_shader(tm, &binary, &variant->config, - &variant->info, shaders, shader_count, options, - dump_shaders); + radv_compile_nir_shader(tm, &binary, &variant->config, + &variant->info, shaders, shader_count, + options, dump_shaders); } LLVMDisposeTargetMachine(tm); -- 2.30.2