X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_shader_tgsi_mem.c;h=3295d75fe7fef749339a3d22d64609ad9fa09275;hb=98b88cc1f62a6044d25d439dff6fb46c9f5025ae;hp=ec11c75b888c9d9ae489940ec66591f72608f7f3;hpb=854593b8eb2cf27d1666fd0ca1931eee80750019;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index ec11c75b888..3295d75fe7f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1,5 +1,6 @@ /* * Copyright 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -21,21 +22,19 @@ * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include "si_shader_internal.h" #include "si_pipe.h" #include "sid.h" -#include "gallivm/lp_bld_arit.h" -#include "gallivm/lp_bld_gather.h" -#include "gallivm/lp_bld_intr.h" #include "tgsi/tgsi_build.h" -#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" +#include "ac_llvm_util.h" -static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data); - -static const struct lp_build_tgsi_action tex_action; +static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data, + LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, + LLVMValueRef *fmask_ptr); /** * Given a v8i32 resource descriptor for a buffer, extract the size of the @@ -51,8 +50,8 @@ static LLVMValueRef get_buffer_size( LLVMBuildExtractElement(builder, descriptor, LLVMConstInt(ctx->i32, 2, 0), ""); - if (ctx->screen->b.chip_class == VI) { - /* On VI, the descriptor contains the size in bytes, + if (ctx->screen->info.chip_class == GFX8) { + /* On GFX8, the descriptor contains the size in bytes, * but TXQ must return the size in elements. * The stride is always non-zero for resources using TXQ. */ @@ -90,14 +89,64 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx, return ctx->abi.load_ssbo(&ctx->abi, index, false); } -static bool tgsi_is_array_image(unsigned target) +static enum ac_image_dim +ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target) +{ + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + if (screen->info.chip_class == GFX9) + return ac_image_2d; + return ac_image_1d; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOWRECT: + return ac_image_2d; + case TGSI_TEXTURE_3D: + return ac_image_3d; + case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_SHADOWCUBE: + case TGSI_TEXTURE_CUBE_ARRAY: + case TGSI_TEXTURE_SHADOWCUBE_ARRAY: + return ac_image_cube; + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + if (screen->info.chip_class == GFX9) + return ac_image_2darray; + return ac_image_1darray; + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: + return ac_image_2darray; + case TGSI_TEXTURE_2D_MSAA: + return ac_image_2dmsaa; + case TGSI_TEXTURE_2D_ARRAY_MSAA: + return ac_image_2darraymsaa; + default: + unreachable("unhandled texture type"); + } +} + +static enum ac_image_dim +ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target) { - return target == TGSI_TEXTURE_3D || - target == TGSI_TEXTURE_CUBE || - target == TGSI_TEXTURE_1D_ARRAY || - target == TGSI_TEXTURE_2D_ARRAY || - target == TGSI_TEXTURE_CUBE_ARRAY || - target == TGSI_TEXTURE_2D_ARRAY_MSAA; + enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target); + + /* Match the resource type set in the descriptor. */ + if (dim == ac_image_cube || + (screen->info.chip_class <= GFX8 && dim == ac_image_3d)) + dim = ac_image_2darray; + else if (target == TGSI_TEXTURE_2D && screen->info.chip_class == GFX9) { + /* When a single layer of a 3D texture is bound, the shader + * will refer to a 2D target, but the descriptor has a 3D type. + * Since the HW ignores BASE_ARRAY in this case, we need to + * send 3 coordinates. This doesn't hurt when the underlying + * texture is non-3D. + */ + dim = ac_image_3d; + } + + return dim; } /** @@ -114,7 +163,7 @@ static bool tgsi_is_array_image(unsigned target) static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc) { - if (ctx->screen->b.chip_class <= CIK) { + if (ctx->screen->info.chip_class <= GFX7) { return rsrc; } else { LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); @@ -127,26 +176,32 @@ static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, } } +/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should + * adjust "index" to point to FMASK. */ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, LLVMValueRef list, LLVMValueRef index, - enum ac_descriptor_type desc_type, bool dcc_off) + enum ac_descriptor_type desc_type, + bool uses_store, bool bindless) { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef rsrc; if (desc_type == AC_DESC_BUFFER) { - index = LLVMBuildMul(builder, index, - LLVMConstInt(ctx->i32, 2, 0), ""); - index = LLVMBuildAdd(builder, index, - ctx->i32_1, ""); + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), + ctx->i32_1); list = LLVMBuildPointerCast(builder, list, - si_const_array(ctx->v4i32, 0), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); } else { - assert(desc_type == AC_DESC_IMAGE); + assert(desc_type == AC_DESC_IMAGE || + desc_type == AC_DESC_FMASK); } - rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); - if (dcc_off) + if (bindless) + rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index); + else + rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); + + if (desc_type == AC_DESC_IMAGE && uses_store) rsrc = force_dcc_off(ctx, rsrc); return rsrc; } @@ -158,74 +213,72 @@ static void image_fetch_rsrc( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_src_register *image, - bool is_store, unsigned target, + bool fmask, bool is_store, unsigned target, LLVMValueRef *rsrc) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn, - ctx->param_samplers_and_images); - LLVMValueRef index; - bool dcc_off = is_store; + bool bindless = image->Register.File != TGSI_FILE_IMAGE; + LLVMValueRef rsrc_ptr, index; - if (!image->Register.Indirect) { - const struct tgsi_shader_info *info = bld_base->info; - unsigned images_writemask = info->images_store | - info->images_atomic; - - index = LLVMConstInt(ctx->i32, - si_get_image_slot(image->Register.Index), 0); - - if (images_writemask & (1 << image->Register.Index)) - dcc_off = true; - } else { - /* From the GL_ARB_shader_image_load_store extension spec: - * - * If a shader performs an image load, store, or atomic - * operation using an image variable declared as an array, - * and if the index used to select an individual element is - * negative or greater than or equal to the size of the - * array, the results of the operation are undefined but may - * not lead to termination. - */ - index = si_get_bounded_indirect_index(ctx, &image->Indirect, - image->Register.Index, - ctx->num_images); - index = LLVMBuildSub(ctx->ac.builder, - LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), - index, ""); - } - - if (image->Register.File != TGSI_FILE_IMAGE) { + if (bindless) { /* Bindless descriptors are accessible from a different pair of * user SGPR indices. */ rsrc_ptr = LLVMGetParam(ctx->main_fn, ctx->param_bindless_samplers_and_images); - index = lp_build_emit_fetch_src(bld_base, image, - TGSI_TYPE_UNSIGNED, 0); + index = lp_build_emit_fetch_src(bld_base, image, TGSI_TYPE_UNSIGNED, 0); - /* For simplicity, bindless image descriptors use fixed - * 16-dword slots for now. - */ + /* Bindless image descriptors use 16-dword slots. */ index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); + /* FMASK is right after the image. */ + if (fmask) + index = LLVMBuildAdd(ctx->ac.builder, index, ctx->i32_1, ""); + } else { + rsrc_ptr = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); + + if (!image->Register.Indirect) { + index = LLVMConstInt(ctx->i32, image->Register.Index, 0); + } else { + /* From the GL_ARB_shader_image_load_store extension spec: + * + * If a shader performs an image load, store, or atomic + * operation using an image variable declared as an array, + * and if the index used to select an individual element is + * negative or greater than or equal to the size of the + * array, the results of the operation are undefined but may + * not lead to termination. + */ + index = si_get_bounded_indirect_index(ctx, &image->Indirect, + image->Register.Index, + ctx->num_images); + } + /* FMASKs are separate from images. */ + if (fmask) { + index = LLVMBuildAdd(ctx->ac.builder, index, + LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), ""); + } + index = LLVMBuildSub(ctx->ac.builder, + LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0), + index, ""); } *rsrc = si_load_image_desc(ctx, rsrc_ptr, index, + fmask ? AC_DESC_FMASK : target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE, - dcc_off); + is_store, bindless); } -static LLVMValueRef image_fetch_coords( +static void image_fetch_coords( struct lp_build_tgsi_context *bld_base, const struct tgsi_full_instruction *inst, - unsigned src, LLVMValueRef desc) + unsigned src, LLVMValueRef desc, + LLVMValueRef *coords) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMBuilderRef builder = ctx->ac.builder; unsigned target = inst->Memory.Texture; unsigned num_coords = tgsi_util_get_texture_coord_dim(target); - LLVMValueRef coords[4]; LLVMValueRef tmp; int chan; @@ -235,15 +288,20 @@ static LLVMValueRef image_fetch_coords( coords[chan] = tmp; } - if (ctx->screen->b.chip_class >= GFX9) { + if (target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA) { + /* Need the sample index as well. */ + tmp = lp_build_emit_fetch(bld_base, inst, src, TGSI_SWIZZLE_W); + coords[chan] = ac_to_integer(&ctx->ac, tmp); + } + + if (ctx->screen->info.chip_class == GFX9) { /* 1D textures are allocated and used as 2D on GFX9. */ if (target == TGSI_TEXTURE_1D) { coords[1] = ctx->i32_0; - num_coords++; } else if (target == TGSI_TEXTURE_1D_ARRAY) { coords[2] = coords[1]; coords[1] = ctx->i32_0; - num_coords++; } else if (target == TGSI_TEXTURE_2D) { /* The hw can't bind a slice of a 3D image as a 2D * image, because it ignores BASE_ARRAY if the target @@ -258,184 +316,35 @@ static LLVMValueRef image_fetch_coords( first_layer = LLVMBuildAnd(builder, first_layer, mask, ""); coords[2] = first_layer; - num_coords++; - } - } - - if (num_coords == 1) - return coords[0]; - - if (num_coords == 3) { - /* LLVM has difficulties lowering 3-element vectors. */ - coords[3] = bld_base->uint_bld.undef; - num_coords = 4; - } - - return lp_build_gather_values(&ctx->gallivm, coords, num_coords); -} - -/** - * Append the extra mode bits that are used by image load and store. - */ -static void image_append_args( - struct si_shader_context *ctx, - struct lp_build_emit_data * emit_data, - unsigned target, - bool atomic, - bool force_glc) -{ - const struct tgsi_full_instruction *inst = emit_data->inst; - LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); - LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); - LLVMValueRef r128 = i1false; - LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false; - LLVMValueRef glc = - force_glc || - inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? - i1true : i1false; - LLVMValueRef slc = i1false; - LLVMValueRef lwe = i1false; - - if (atomic || (HAVE_LLVM <= 0x0309)) { - emit_data->args[emit_data->arg_count++] = r128; - emit_data->args[emit_data->arg_count++] = da; - if (!atomic) { - emit_data->args[emit_data->arg_count++] = glc; } - emit_data->args[emit_data->arg_count++] = slc; - return; - } - - /* HAVE_LLVM >= 0x0400 */ - emit_data->args[emit_data->arg_count++] = glc; - emit_data->args[emit_data->arg_count++] = slc; - emit_data->args[emit_data->arg_count++] = lwe; - emit_data->args[emit_data->arg_count++] = da; -} - -/** - * Append the resource and indexing arguments for buffer intrinsics. - * - * \param rsrc the v4i32 buffer resource - * \param index index into the buffer (stride-based) - * \param offset byte offset into the buffer - */ -static void buffer_append_args( - struct si_shader_context *ctx, - struct lp_build_emit_data *emit_data, - LLVMValueRef rsrc, - LLVMValueRef index, - LLVMValueRef offset, - bool atomic, - bool force_glc) -{ - const struct tgsi_full_instruction *inst = emit_data->inst; - LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0); - LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0); - - emit_data->args[emit_data->arg_count++] = rsrc; - emit_data->args[emit_data->arg_count++] = index; /* vindex */ - emit_data->args[emit_data->arg_count++] = offset; /* voffset */ - if (!atomic) { - emit_data->args[emit_data->arg_count++] = - force_glc || - inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ? - i1true : i1false; /* glc */ } - emit_data->args[emit_data->arg_count++] = i1false; /* slc */ } -static void load_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) +static unsigned get_cache_policy(struct si_shader_context *ctx, + const struct tgsi_full_instruction *inst, + bool atomic, bool may_store_unaligned, + bool writeonly_memory) { - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction * inst = emit_data->inst; - unsigned target = inst->Memory.Texture; - LLVMValueRef rsrc; - - emit_data->dst_type = ctx->v4f32; - - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || - inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { - LLVMValueRef offset; - LLVMValueRef tmp; - - bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; - rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); - - tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); - offset = ac_to_integer(&ctx->ac, tmp); - - buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, - offset, false, false); - } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { - LLVMValueRef coords; - - image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc); - coords = image_fetch_coords(bld_base, inst, 1, rsrc); - - if (target == TGSI_TEXTURE_BUFFER) { - buffer_append_args(ctx, emit_data, rsrc, coords, - ctx->i32_0, false, false); - } else { - emit_data->args[0] = coords; - emit_data->args[1] = rsrc; - emit_data->args[2] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */ - emit_data->arg_count = 3; - - image_append_args(ctx, emit_data, target, false, false); - } + unsigned cache_policy = 0; + + if (!atomic && + /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. + * All store opcodes not aligned to a dword are affected. + * The only way to get unaligned stores in radeonsi is through + * shader images. */ + ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) || + /* If this is write-only, don't keep data in L1 to prevent + * evicting L1 cache lines that may be needed by other + * instructions. */ + writeonly_memory || + inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))) { + cache_policy |= ac_glc; } -} - -static unsigned get_load_intr_attribs(bool can_speculate) -{ - /* READNONE means writes can't affect it, while READONLY means that - * writes can affect it. */ - return can_speculate && HAVE_LLVM >= 0x0400 ? - LP_FUNC_ATTR_READNONE : - LP_FUNC_ATTR_READONLY; -} - -static unsigned get_store_intr_attribs(bool writeonly_memory) -{ - return writeonly_memory && HAVE_LLVM >= 0x0400 ? - LP_FUNC_ATTR_INACCESSIBLE_MEM_ONLY : - LP_FUNC_ATTR_WRITEONLY; -} -static void load_emit_buffer(struct si_shader_context *ctx, - struct lp_build_emit_data *emit_data, - bool can_speculate, bool allow_smem) -{ - const struct tgsi_full_instruction *inst = emit_data->inst; - uint writemask = inst->Dst[0].Register.WriteMask; - uint count = util_last_bit(writemask); - LLVMValueRef *args = emit_data->args; - - /* Don't use SMEM for shader buffer loads, because LLVM doesn't - * select SMEM for SI.load.const with a non-constant offset, and - * constant offsets practically don't exist with shader buffers. - * - * Also, SI.load.const doesn't use inst_offset when it's lowered - * to VMEM, so we just end up with more VALU instructions in the end - * and no benefit. - * - * TODO: Remove this line once LLVM can select SMEM with a non-constant - * offset, and can derive inst_offset when VMEM is selected. - * After that, si_memory_barrier should invalidate sL1 for shader - * buffers. - */ + if (inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY) + cache_policy |= ac_slc; - assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */ - emit_data->output[emit_data->chan] = - ac_build_buffer_load(&ctx->ac, args[0], count, NULL, - args[2], NULL, 0, - LLVMConstIntGetZExtValue(args[3]), - LLVMConstIntGetZExtValue(args[4]), - can_speculate, allow_smem); + return cache_policy; } static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx, @@ -449,7 +358,7 @@ static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx, offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0); offset = ac_to_integer(&ctx->ac, offset); - ptr = ctx->shared_memory; + ptr = ctx->ac.lds; ptr = LLVMBuildGEP(builder, ptr, &offset, 1, ""); addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), ""); @@ -478,7 +387,7 @@ static void load_emit_memory( derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, ""); } - emit_data->output[emit_data->chan] = lp_build_gather_values(&ctx->gallivm, channels, 4); + emit_data->output[emit_data->chan] = ac_build_gather_values(&ctx->ac, channels, 4); } /** @@ -489,20 +398,53 @@ static void load_emit_memory( * For LOAD, set this to (store | atomic) slot usage in the shader. * For STORE, set this to (load | atomic) slot usage in the shader. * \param images_reverse_access_mask Same as above, but for images. + * \param bindless_buffer_reverse_access_mask Same as above, but for bindless image buffers. + * \param bindless_image_reverse_access_mask Same as above, but for bindless images. */ static bool is_oneway_access_only(const struct tgsi_full_instruction *inst, const struct tgsi_shader_info *info, unsigned shader_buffers_reverse_access_mask, - unsigned images_reverse_access_mask) + unsigned images_reverse_access_mask, + bool bindless_buffer_reverse_access_mask, + bool bindless_image_reverse_access_mask) { + enum tgsi_file_type resource_file; + unsigned resource_index; + bool resource_indirect; + + if (inst->Instruction.Opcode == TGSI_OPCODE_STORE) { + resource_file = inst->Dst[0].Register.File; + resource_index = inst->Dst[0].Register.Index; + resource_indirect = inst->Dst[0].Register.Indirect; + } else { + resource_file = inst->Src[0].Register.File; + resource_index = inst->Src[0].Register.Index; + resource_indirect = inst->Src[0].Register.Indirect; + } + + assert(resource_file == TGSI_FILE_BUFFER || + resource_file == TGSI_FILE_IMAGE || + /* bindless image */ + resource_file == TGSI_FILE_INPUT || + resource_file == TGSI_FILE_OUTPUT || + resource_file == TGSI_FILE_CONSTANT || + resource_file == TGSI_FILE_TEMPORARY || + resource_file == TGSI_FILE_IMMEDIATE); + + assert(resource_file != TGSI_FILE_BUFFER || + inst->Memory.Texture == TGSI_TEXTURE_BUFFER); + + bool bindless = resource_file != TGSI_FILE_BUFFER && + resource_file != TGSI_FILE_IMAGE; + /* RESTRICT means NOALIAS. * If there are no writes, we can assume the accessed memory is read-only. * If there are no reads, we can assume the accessed memory is write-only. */ - if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT) { + if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT && !bindless) { unsigned reverse_access_mask; - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + if (resource_file == TGSI_FILE_BUFFER) { reverse_access_mask = shader_buffers_reverse_access_mask; } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { reverse_access_mask = info->images_buffers & @@ -512,12 +454,12 @@ static bool is_oneway_access_only(const struct tgsi_full_instruction *inst, images_reverse_access_mask; } - if (inst->Src[0].Register.Indirect) { + if (resource_indirect) { if (!reverse_access_mask) return true; } else { if (!(reverse_access_mask & - (1u << inst->Src[0].Register.Index))) + (1u << resource_index))) return true; } } @@ -530,15 +472,15 @@ static bool is_oneway_access_only(const struct tgsi_full_instruction *inst, * Same for the case when there are no writes/reads for non-buffer * images. */ - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || - (inst->Memory.Texture == TGSI_TEXTURE_BUFFER && - (inst->Src[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Src[0].Register.File)))) { + if (resource_file == TGSI_FILE_BUFFER || + inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { if (!shader_buffers_reverse_access_mask && - !(info->images_buffers & images_reverse_access_mask)) + !(info->images_buffers & images_reverse_access_mask) && + !bindless_buffer_reverse_access_mask) return true; } else { - if (!(~info->images_buffers & images_reverse_access_mask)) + if (!(~info->images_buffers & images_reverse_access_mask) && + !bindless_image_reverse_access_mask) return true; } return false; @@ -550,191 +492,165 @@ static void load_emit( struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; const struct tgsi_shader_info *info = &ctx->shader->selector->info; - char intrinsic_name[64]; bool can_speculate = false; + LLVMValueRef vindex = ctx->i32_0; + LLVMValueRef voffset = ctx->i32_0; + struct ac_image_args args = {}; if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { load_emit_memory(ctx, emit_data); return; } + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { + bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; + args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); + voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0)); + } else { + unsigned target = inst->Memory.Texture; + + image_fetch_rsrc(bld_base, &inst->Src[0], false, false, target, &args.resource); + image_fetch_coords(bld_base, inst, 1, args.resource, args.coords); + + if ((inst->Memory.Texture == TGSI_TEXTURE_2D_MSAA || + inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) && + !(ctx->screen->debug_flags & DBG(NO_FMASK))) { + LLVMValueRef fmask; + + image_fetch_rsrc(bld_base, &inst->Src[0], true, false, target, &fmask); + ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, + inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); + } + vindex = args.coords[0]; /* for buffers only */ + } + if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { - load_emit_buffer(ctx, emit_data, true, true); + emit_data->output[emit_data->chan] = + ac_build_buffer_load(&ctx->ac, args.resource, + util_last_bit(inst->Dst[0].Register.WriteMask), + NULL, voffset, NULL, 0, 0, true, true); return; } if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - si_emit_waitcnt(ctx, VM_CNT); + ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE); can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) && is_oneway_access_only(inst, info, info->shader_buffers_store | info->shader_buffers_atomic, info->images_store | - info->images_atomic); + info->images_atomic, + info->uses_bindless_buffer_store | + info->uses_bindless_buffer_atomic, + info->uses_bindless_image_store | + info->uses_bindless_image_atomic); + args.cache_policy = get_cache_policy(ctx, inst, false, false, false); if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { - load_emit_buffer(ctx, emit_data, can_speculate, false); + /* Don't use SMEM for shader buffer loads, because LLVM doesn't + * select SMEM for SI.load.const with a non-constant offset, and + * constant offsets practically don't exist with shader buffers. + * + * Also, SI.load.const doesn't use inst_offset when it's lowered + * to VMEM, so we just end up with more VALU instructions in the end + * and no benefit. + * + * TODO: Remove this line once LLVM can select SMEM with a non-constant + * offset, and can derive inst_offset when VMEM is selected. + * After that, si_memory_barrier should invalidate sL1 for shader + * buffers. + */ + emit_data->output[emit_data->chan] = + ac_build_buffer_load(&ctx->ac, args.resource, + util_last_bit(inst->Dst[0].Register.WriteMask), + NULL, voffset, NULL, 0, + args.cache_policy, can_speculate, false); return; } if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { + unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask); + LLVMValueRef result = + ac_build_buffer_load_format(&ctx->ac, + args.resource, + vindex, + ctx->i32_0, + num_channels, + args.cache_policy, + can_speculate); emit_data->output[emit_data->chan] = - lp_build_intrinsic( - builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type, - emit_data->args, emit_data->arg_count, - get_load_intr_attribs(can_speculate)); + ac_build_expand_to_vec4(&ctx->ac, result, num_channels); } else { - ac_get_image_intr_name("llvm.amdgcn.image.load", - emit_data->dst_type, /* vdata */ - LLVMTypeOf(emit_data->args[0]), /* coords */ - LLVMTypeOf(emit_data->args[1]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); + args.opcode = ac_image_load; + args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); + args.attributes = ac_get_load_intr_attribs(can_speculate); + args.dmask = 0xf; emit_data->output[emit_data->chan] = - lp_build_intrinsic( - builder, intrinsic_name, emit_data->dst_type, - emit_data->args, emit_data->arg_count, - get_load_intr_attribs(can_speculate)); - } -} - -static void store_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction * inst = emit_data->inst; - struct tgsi_full_src_register memory; - LLVMValueRef chans[4]; - LLVMValueRef data; - LLVMValueRef rsrc; - unsigned chan; - - emit_data->dst_type = ctx->voidt; - - for (chan = 0; chan < 4; ++chan) { - chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); - } - data = lp_build_gather_values(&ctx->gallivm, chans, 4); - - emit_data->args[emit_data->arg_count++] = data; - - memory = tgsi_full_src_register_from_dst(&inst->Dst[0]); - - if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { - LLVMValueRef offset; - LLVMValueRef tmp; - - rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false); - - tmp = lp_build_emit_fetch(bld_base, inst, 0, 0); - offset = ac_to_integer(&ctx->ac, tmp); - - buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, - offset, false, false); - } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) { - unsigned target = inst->Memory.Texture; - LLVMValueRef coords; - - /* 8bit/16bit TC L1 write corruption bug on SI. - * All store opcodes not aligned to a dword are affected. - * - * The only way to get unaligned stores in radeonsi is through - * shader images. - */ - bool force_glc = ctx->screen->b.chip_class == SI; - - image_fetch_rsrc(bld_base, &memory, true, target, &rsrc); - coords = image_fetch_coords(bld_base, inst, 0, rsrc); - - if (target == TGSI_TEXTURE_BUFFER) { - buffer_append_args(ctx, emit_data, rsrc, coords, - ctx->i32_0, false, force_glc); - } else { - emit_data->args[1] = coords; - emit_data->args[2] = rsrc; - emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */ - emit_data->arg_count = 4; - - image_append_args(ctx, emit_data, target, false, force_glc); - } + ac_build_image_opcode(&ctx->ac, &args); } } -static void store_emit_buffer( - struct si_shader_context *ctx, - struct lp_build_emit_data *emit_data, - bool writeonly_memory) +static void store_emit_buffer(struct si_shader_context *ctx, + LLVMValueRef resource, + unsigned writemask, + LLVMValueRef value, + LLVMValueRef voffset, + unsigned cache_policy, + bool writeonly_memory) { - const struct tgsi_full_instruction *inst = emit_data->inst; LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef base_data = emit_data->args[0]; - LLVMValueRef base_offset = emit_data->args[3]; - unsigned writemask = inst->Dst[0].Register.WriteMask; + LLVMValueRef base_data = value; + LLVMValueRef base_offset = voffset; while (writemask) { int start, count; - const char *intrinsic_name; - LLVMValueRef data; - LLVMValueRef offset; - LLVMValueRef tmp; + LLVMValueRef data, voff; u_bit_scan_consecutive_range(&writemask, &start, &count); - /* Due to an LLVM limitation, split 3-element writes - * into a 2-element and a 1-element write. */ - if (count == 3) { - writemask |= 1 << (start + 2); - count = 2; - } - - if (count == 4) { + if (count == 3 && ac_has_vec3_support(ctx->ac.chip_class, false)) { + LLVMValueRef values[3] = { + LLVMBuildExtractElement(builder, base_data, + LLVMConstInt(ctx->i32, start, 0), ""), + LLVMBuildExtractElement(builder, base_data, + LLVMConstInt(ctx->i32, start + 1, 0), ""), + LLVMBuildExtractElement(builder, base_data, + LLVMConstInt(ctx->i32, start + 2, 0), ""), + }; + data = ac_build_gather_values(&ctx->ac, values, 3); + } else if (count >= 3) { data = base_data; - intrinsic_name = "llvm.amdgcn.buffer.store.v4f32"; } else if (count == 2) { - LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2); - - tmp = LLVMBuildExtractElement( - builder, base_data, - LLVMConstInt(ctx->i32, start, 0), ""); - data = LLVMBuildInsertElement( - builder, LLVMGetUndef(v2f32), tmp, - ctx->i32_0, ""); - - tmp = LLVMBuildExtractElement( - builder, base_data, - LLVMConstInt(ctx->i32, start + 1, 0), ""); - data = LLVMBuildInsertElement( - builder, data, tmp, ctx->i32_1, ""); - - intrinsic_name = "llvm.amdgcn.buffer.store.v2f32"; + LLVMValueRef values[2] = { + LLVMBuildExtractElement(builder, base_data, + LLVMConstInt(ctx->i32, start, 0), ""), + LLVMBuildExtractElement(builder, base_data, + LLVMConstInt(ctx->i32, start + 1, 0), ""), + }; + + data = ac_build_gather_values(&ctx->ac, values, 2); } else { assert(count == 1); data = LLVMBuildExtractElement( builder, base_data, LLVMConstInt(ctx->i32, start, 0), ""); - intrinsic_name = "llvm.amdgcn.buffer.store.f32"; } - offset = base_offset; + voff = base_offset; if (start != 0) { - offset = LLVMBuildAdd( - builder, offset, + voff = LLVMBuildAdd( + builder, voff, LLVMConstInt(ctx->i32, start * 4, 0), ""); } - emit_data->args[0] = data; - emit_data->args[3] = offset; - - lp_build_intrinsic( - builder, intrinsic_name, emit_data->dst_type, - emit_data->args, emit_data->arg_count, - get_store_intr_attribs(writeonly_memory)); + ac_build_buffer_store_dword(&ctx->ac, resource, data, count, + voff, ctx->i32_0, 0, cache_policy, + false); } } @@ -767,107 +683,76 @@ static void store_emit( struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; const struct tgsi_shader_info *info = &ctx->shader->selector->info; + struct tgsi_full_src_register resource_reg = + tgsi_full_src_register_from_dst(&inst->Dst[0]); unsigned target = inst->Memory.Texture; - char intrinsic_name[64]; - bool writeonly_memory = false; if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { store_emit_memory(ctx, emit_data); return; } - if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - si_emit_waitcnt(ctx, VM_CNT); + bool writeonly_memory = is_oneway_access_only(inst, info, + info->shader_buffers_load | + info->shader_buffers_atomic, + info->images_load | + info->images_atomic, + info->uses_bindless_buffer_load | + info->uses_bindless_buffer_atomic, + info->uses_bindless_image_load | + info->uses_bindless_image_atomic); + LLVMValueRef chans[4]; + LLVMValueRef vindex = ctx->i32_0; + LLVMValueRef voffset = ctx->i32_0; + struct ac_image_args args = {}; - writeonly_memory = is_oneway_access_only(inst, info, - info->shader_buffers_load | - info->shader_buffers_atomic, - info->images_load | - info->images_atomic); + for (unsigned chan = 0; chan < 4; ++chan) + chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { - store_emit_buffer(ctx, emit_data, writeonly_memory); - return; - } - - if (target == TGSI_TEXTURE_BUFFER) { - emit_data->output[emit_data->chan] = lp_build_intrinsic( - builder, "llvm.amdgcn.buffer.store.format.v4f32", - emit_data->dst_type, emit_data->args, - emit_data->arg_count, - get_store_intr_attribs(writeonly_memory)); + args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false); + voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0)); } else { - ac_get_image_intr_name("llvm.amdgcn.image.store", - LLVMTypeOf(emit_data->args[0]), /* vdata */ - LLVMTypeOf(emit_data->args[1]), /* coords */ - LLVMTypeOf(emit_data->args[2]), /* rsrc */ - intrinsic_name, sizeof(intrinsic_name)); - - emit_data->output[emit_data->chan] = - lp_build_intrinsic( - builder, intrinsic_name, emit_data->dst_type, - emit_data->args, emit_data->arg_count, - get_store_intr_attribs(writeonly_memory)); + image_fetch_rsrc(bld_base, &resource_reg, false, true, target, &args.resource); + image_fetch_coords(bld_base, inst, 0, args.resource, args.coords); + vindex = args.coords[0]; /* for buffers only */ } -} - -static void atomic_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction * inst = emit_data->inst; - LLVMValueRef data1, data2; - LLVMValueRef rsrc; - LLVMValueRef tmp; - emit_data->dst_type = ctx->f32; + if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) + ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE); - tmp = lp_build_emit_fetch(bld_base, inst, 2, 0); - data1 = ac_to_integer(&ctx->ac, tmp); + bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER; + args.cache_policy = get_cache_policy(ctx, inst, + false, /* atomic */ + is_image, /* may_store_unaligned */ + writeonly_memory); - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { - tmp = lp_build_emit_fetch(bld_base, inst, 3, 0); - data2 = ac_to_integer(&ctx->ac, tmp); + if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { + store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask, + ac_build_gather_values(&ctx->ac, chans, 4), + voffset, args.cache_policy, writeonly_memory); + return; } - /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order - * of arguments, which is reversed relative to TGSI (and GLSL) - */ - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - emit_data->args[emit_data->arg_count++] = data2; - emit_data->args[emit_data->arg_count++] = data1; - - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { - LLVMValueRef offset; - - rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false); - - tmp = lp_build_emit_fetch(bld_base, inst, 1, 0); - offset = ac_to_integer(&ctx->ac, tmp); - - buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0, - offset, true, false); - } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || - tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { - unsigned target = inst->Memory.Texture; - LLVMValueRef coords; - - image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc); - coords = image_fetch_coords(bld_base, inst, 1, rsrc); + if (target == TGSI_TEXTURE_BUFFER) { + unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask); - if (target == TGSI_TEXTURE_BUFFER) { - buffer_append_args(ctx, emit_data, rsrc, coords, - ctx->i32_0, true, false); - } else { - emit_data->args[emit_data->arg_count++] = coords; - emit_data->args[emit_data->arg_count++] = rsrc; + ac_build_buffer_store_format(&ctx->ac, args.resource, + ac_build_gather_values(&ctx->ac, chans, num_channels), + vindex, ctx->i32_0 /* voffset */, + num_channels, + args.cache_policy); + } else { + args.opcode = ac_image_store; + args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4); + args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); + args.attributes = AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY; + args.dmask = 0xf; - image_append_args(ctx, emit_data, target, true, false); - } + emit_data->output[emit_data->chan] = + ac_build_image_opcode(&ctx->ac, &args); } } @@ -876,6 +761,7 @@ static void atomic_emit_memory(struct si_shader_context *ctx, LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; LLVMValueRef ptr, result, arg; + const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup"; ptr = get_memory_ptr(ctx, inst, ctx->i32, 1); @@ -889,11 +775,8 @@ static void atomic_emit_memory(struct si_shader_context *ctx, new_data = ac_to_integer(&ctx->ac, new_data); - result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data, - LLVMAtomicOrderingSequentiallyConsistent, - LLVMAtomicOrderingSequentiallyConsistent, - false); - + result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, arg, new_data, + sync_scope); result = LLVMBuildExtractValue(builder, result, 0, ""); } else { LLVMAtomicRMWBinOp op; @@ -930,11 +813,10 @@ static void atomic_emit_memory(struct si_shader_context *ctx, unreachable("unknown atomic opcode"); } - result = LLVMBuildAtomicRMW(builder, op, ptr, arg, - LLVMAtomicOrderingSequentiallyConsistent, - false); + result = ac_build_atomic_rmw(&ctx->ac, op, ptr, arg, sync_scope); } - emit_data->output[emit_data->chan] = LLVMBuildBitCast(builder, result, emit_data->dst_type, ""); + emit_data->output[emit_data->chan] = + LLVMBuildBitCast(builder, result, ctx->f32, ""); } static void atomic_emit( @@ -943,69 +825,114 @@ static void atomic_emit( struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction * inst = emit_data->inst; - char intrinsic_name[40]; - LLVMValueRef tmp; + struct ac_image_args args = {}; + unsigned num_data = 0; + LLVMValueRef vindex = ctx->i32_0; + LLVMValueRef voffset = ctx->i32_0; if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { atomic_emit_memory(ctx, emit_data); return; } - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || - inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.buffer.atomic.%s", action->intr_name); + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order + * of arguments, which is reversed relative to TGSI (and GLSL) + */ + args.data[num_data++] = + ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 3, 0)); + } + + args.data[num_data++] = + ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0)); + + args.cache_policy = get_cache_policy(ctx, inst, true, false, false); + + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false); + voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0)); } else { - LLVMValueRef coords; - char coords_type[8]; + image_fetch_rsrc(bld_base, &inst->Src[0], false, true, + inst->Memory.Texture, &args.resource); + image_fetch_coords(bld_base, inst, 1, args.resource, args.coords); + vindex = args.coords[0]; /* for buffers only */ + } + + if (inst->Src[0].Register.File != TGSI_FILE_BUFFER && + inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { + LLVMValueRef buf_args[7]; + unsigned num_args = 0; + buf_args[num_args++] = args.data[0]; if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - coords = emit_data->args[2]; - else - coords = emit_data->args[1]; + buf_args[num_args++] = args.data[1]; - ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type)); + buf_args[num_args++] = args.resource; + buf_args[num_args++] = vindex; + buf_args[num_args++] = voffset; + buf_args[num_args++] = ctx->i32_0; /* soffset */ + buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0); + + char intrinsic_name[64]; snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.image.atomic.%s.%s", - action->intr_name, coords_type); + "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name); + emit_data->output[emit_data->chan] = + ac_to_float(&ctx->ac, + ac_build_intrinsic(&ctx->ac, intrinsic_name, + ctx->i32, buf_args, num_args, 0)); + return; } - tmp = lp_build_intrinsic( - builder, intrinsic_name, ctx->i32, - emit_data->args, emit_data->arg_count, 0); - emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp); -} - -static void set_tex_fetch_args(struct si_shader_context *ctx, - struct lp_build_emit_data *emit_data, - unsigned target, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) -{ - struct ac_image_args args = {}; + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + LLVMValueRef buf_args[7]; + unsigned num_args = 0; - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); + buf_args[num_args++] = args.data[0]; + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + buf_args[num_args++] = args.data[1]; - if (count > 1) - args.addr = lp_build_gather_values(&ctx->gallivm, param, count); - else - args.addr = param[0]; + buf_args[num_args++] = args.resource; + buf_args[num_args++] = vindex; + buf_args[num_args++] = voffset; + buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true : ctx->i1false; - args.resource = res_ptr; - args.sampler = samp_ptr; - args.dmask = dmask; - args.unorm = target == TGSI_TEXTURE_RECT || - target == TGSI_TEXTURE_SHADOWRECT; - args.da = tgsi_is_array_sampler(target); + char intrinsic_name[40]; + snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.buffer.atomic.%s", action->intr_name); + emit_data->output[emit_data->chan] = + ac_to_float(&ctx->ac, + ac_build_intrinsic(&ctx->ac, intrinsic_name, + ctx->i32, buf_args, num_args, 0)); + } else { + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + args.opcode = ac_image_atomic_cmpswap; + } else { + args.opcode = ac_image_atomic; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ATOMXCHG: args.atomic = ac_atomic_swap; break; + case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; break; + case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; break; + case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; break; + case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; break; + case TGSI_OPCODE_ATOMUMIN: args.atomic = ac_atomic_umin; break; + case TGSI_OPCODE_ATOMUMAX: args.atomic = ac_atomic_umax; break; + case TGSI_OPCODE_ATOMIMIN: args.atomic = ac_atomic_smin; break; + case TGSI_OPCODE_ATOMIMAX: args.atomic = ac_atomic_smax; break; + case TGSI_OPCODE_ATOMINC_WRAP: + args.atomic = ac_atomic_inc_wrap; + break; + case TGSI_OPCODE_ATOMDEC_WRAP: + args.atomic = ac_atomic_dec_wrap; + break; + default: unreachable("unhandled image atomic"); + } + } - /* Ugly, but we seem to have no other choice right now. */ - STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args)); - memcpy(emit_data->args, &args, sizeof(args)); + args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); + emit_data->output[emit_data->chan] = + ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, &args)); + } } static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, @@ -1014,7 +941,7 @@ static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, LLVMBuilderRef builder = ctx->ac.builder; /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->b.chip_class >= GFX9 && + if (ctx->screen->info.chip_class == GFX9 && (target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY)) { LLVMValueRef layers = @@ -1037,66 +964,87 @@ static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, return out; } -static void resq_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) +static void resq_emit( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); + LLVMBuilderRef builder = ctx->ac.builder; const struct tgsi_full_instruction *inst = emit_data->inst; - const struct tgsi_full_src_register *reg = &inst->Src[0]; - - emit_data->dst_type = ctx->v4i32; + const struct tgsi_full_src_register *reg = + &inst->Src[inst->Instruction.Opcode == TGSI_OPCODE_TXQ ? 1 : 0]; if (reg->Register.File == TGSI_FILE_BUFFER) { - emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg, false); - emit_data->arg_count = 1; - } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, - &emit_data->args[0]); - emit_data->arg_count = 1; - } else { - LLVMValueRef res_ptr; - unsigned image_target; - - if (inst->Memory.Texture == TGSI_TEXTURE_3D) - image_target = TGSI_TEXTURE_2D_ARRAY; - else - image_target = inst->Memory.Texture; + LLVMValueRef rsrc = shader_buffer_fetch_rsrc(ctx, reg, false); - image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, - &res_ptr); - set_tex_fetch_args(ctx, emit_data, image_target, - res_ptr, NULL, &ctx->i32_0, 1, - 0xf); + emit_data->output[emit_data->chan] = + LLVMBuildExtractElement(builder, rsrc, + LLVMConstInt(ctx->i32, 2, 0), ""); + return; } -} -static void resq_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - const struct tgsi_full_instruction *inst = emit_data->inst; - LLVMValueRef out; + if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && + inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { + LLVMValueRef rsrc; - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { - out = LLVMBuildExtractElement(builder, emit_data->args[0], - LLVMConstInt(ctx->i32, 2, 0), ""); - } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - out = get_buffer_size(bld_base, emit_data->args[0]); + tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL); + /* Read the size from the buffer descriptor directly. */ + emit_data->output[emit_data->chan] = + get_buffer_size(bld_base, rsrc); + return; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ && + inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { + LLVMValueRef rsrc; + + image_fetch_rsrc(bld_base, reg, false, false, inst->Memory.Texture, &rsrc); + emit_data->output[emit_data->chan] = + get_buffer_size(bld_base, rsrc); + return; + } + + unsigned target; + + if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { + target = inst->Texture.Texture; } else { - struct ac_image_args args; + if (inst->Memory.Texture == TGSI_TEXTURE_3D) + target = TGSI_TEXTURE_2D_ARRAY; + else + target = inst->Memory.Texture; + } - memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ - args.opcode = ac_image_get_resinfo; - out = ac_build_image_opcode(&ctx->ac, &args); + struct ac_image_args args = {}; + args.opcode = ac_image_get_resinfo; + args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); + args.dmask = 0xf; + args.attributes = AC_FUNC_ATTR_READNONE; - out = fix_resinfo(ctx, inst->Memory.Texture, out); + if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { + tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL); + args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + } else { + image_fetch_rsrc(bld_base, reg, false, false, target, &args.resource); + args.lod = ctx->i32_0; } - emit_data->output[emit_data->chan] = out; + emit_data->output[emit_data->chan] = + fix_resinfo(ctx, target, ac_build_image_opcode(&ctx->ac, &args)); + + if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ && + (target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA)) { + LLVMValueRef samples = + ac_build_image_get_sample_count(&ctx->ac, args.resource); + + emit_data->output[emit_data->chan] = + LLVMBuildInsertElement(ctx->ac.builder, + emit_data->output[emit_data->chan], + samples, + LLVMConstInt(ctx->i32, 3, 0), ""); + } } /** @@ -1115,23 +1063,30 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, break; case AC_DESC_BUFFER: /* The buffer is in [4:7]. */ - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); - index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), + ctx->i32_1); list = LLVMBuildPointerCast(builder, list, - si_const_array(ctx->v4i32, 0), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); break; case AC_DESC_FMASK: /* The FMASK is at [8:15]. */ - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); - index = LLVMBuildAdd(builder, index, ctx->i32_1, ""); + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), + ctx->i32_1); break; case AC_DESC_SAMPLER: /* The sampler state is at [12:15]. */ - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), ""); - index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), ""); + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), + LLVMConstInt(ctx->i32, 3, 0)); list = LLVMBuildPointerCast(builder, list, - si_const_array(ctx->v4i32, 0), ""); + ac_array_in_const32_addr_space(ctx->v4i32), ""); break; + case AC_DESC_PLANE_0: + case AC_DESC_PLANE_1: + case AC_DESC_PLANE_2: + /* Only used for the multiplane image support for Vulkan. Should + * never be reached in radeonsi. + */ + unreachable("Plane descriptor requested in radeonsi."); } return ac_build_load_to_sgpr(&ctx->ac, list, index); @@ -1139,13 +1094,13 @@ LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * - * SI-CI: + * GFX6-GFX7: * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic * filtering manually. The driver sets img7 to a mask clearing * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: * s_and_b32 samp0, samp0, img7 * - * VI: + * GFX8: * The ANISO_OVERRIDE sampler field enables this fix in TA. */ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, @@ -1153,7 +1108,7 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, { LLVMValueRef img7, samp0; - if (ctx->screen->b.chip_class >= VI) + if (ctx->screen->info.chip_class >= GFX8) return samp; img7 = LLVMBuildExtractElement(ctx->ac.builder, res, @@ -1165,10 +1120,10 @@ static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, ctx->i32_0, ""); } -static void tex_fetch_ptrs( - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data, - LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) +static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data, + LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, + LLVMValueRef *fmask_ptr) { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); @@ -1187,7 +1142,7 @@ static void tex_fetch_ptrs( reg->Register.Index, ctx->num_samplers); index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), ""); + LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), ""); } else { index = LLVMConstInt(ctx->i32, si_get_sampler_slot(reg->Register.Index), 0); @@ -1201,6 +1156,15 @@ static void tex_fetch_ptrs( ctx->param_bindless_samplers_and_images); index = lp_build_emit_fetch_src(bld_base, reg, TGSI_TYPE_UNSIGNED, 0); + + /* Since bindless handle arithmetic can contain an unsigned integer + * wraparound and si_load_sampler_desc assumes there isn't any, + * use GEP without "inbounds" (inside ac_build_pointer_add) + * to prevent incorrect code generation and hangs. + */ + index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); + list = ac_build_pointer_add(&ctx->ac, list, index); + index = ctx->i32_0; } if (target == TGSI_TEXTURE_BUFFER) @@ -1227,98 +1191,202 @@ static void tex_fetch_ptrs( } } -static void txq_fetch_args( - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) +/* Gather4 should follow the same rules as bilinear filtering, but the hardware + * incorrectly forces nearest filtering if the texture format is integer. + * The only effect it has on Gather4, which always returns 4 texels for + * bilinear filtering, is that the final coordinates are off by 0.5 of + * the texel size. + * + * The workaround is to subtract 0.5 from the unnormalized coordinates, + * or (0.5 / size) from the normalized coordinates. + * + * However, cube textures with 8_8_8_8 data formats require a different + * workaround of overriding the num format to USCALED/SSCALED. This would lose + * precision in 32-bit data formats, so it needs to be applied dynamically at + * runtime. In this case, return an i1 value that indicates whether the + * descriptor was overridden (and hence a fixup of the sampler result is needed). + */ +static LLVMValueRef +si_lower_gather4_integer(struct si_shader_context *ctx, + struct ac_image_args *args, + unsigned target, + enum tgsi_return_type return_type) { - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction *inst = emit_data->inst; - unsigned target = inst->Texture.Texture; - LLVMValueRef res_ptr; - LLVMValueRef address; + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef wa_8888 = NULL; + LLVMValueRef half_texel[2]; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL); + assert(return_type == TGSI_RETURN_TYPE_SINT || + return_type == TGSI_RETURN_TYPE_UINT); - if (target == TGSI_TEXTURE_BUFFER) { - /* Read the size from the buffer descriptor directly. */ - emit_data->args[0] = get_buffer_size(bld_base, res_ptr); - return; + if (target == TGSI_TEXTURE_CUBE || + target == TGSI_TEXTURE_CUBE_ARRAY) { + LLVMValueRef formats; + LLVMValueRef data_format; + LLVMValueRef wa_formats; + + formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, ""); + + data_format = LLVMBuildLShr(builder, formats, + LLVMConstInt(ctx->i32, 20, false), ""); + data_format = LLVMBuildAnd(builder, data_format, + LLVMConstInt(ctx->i32, (1u << 6) - 1, false), ""); + wa_8888 = LLVMBuildICmp( + builder, LLVMIntEQ, data_format, + LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), + ""); + + uint32_t wa_num_format = + return_type == TGSI_RETURN_TYPE_UINT ? + S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) : + S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED); + wa_formats = LLVMBuildAnd(builder, formats, + LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), + ""); + wa_formats = LLVMBuildOr(builder, wa_formats, + LLVMConstInt(ctx->i32, wa_num_format, false), ""); + + formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, ""); + args->resource = LLVMBuildInsertElement( + builder, args->resource, formats, ctx->i32_1, ""); } - /* Textures - set the mip level. */ - address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + if (target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT) { + assert(!wa_8888); + half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); + } else { + struct ac_image_args resinfo = {}; + struct lp_build_if_state if_ctx; + + if (wa_8888) { + /* Skip the texture size query entirely if we don't need it. */ + lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, "")); + } + + /* Query the texture size. */ + resinfo.opcode = ac_image_get_resinfo; + resinfo.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); + resinfo.resource = args->resource; + resinfo.sampler = args->sampler; + resinfo.lod = ctx->ac.i32_0; + resinfo.dmask = 0xf; + resinfo.attributes = AC_FUNC_ATTR_READNONE; + + LLVMValueRef texsize = + fix_resinfo(ctx, target, + ac_build_image_opcode(&ctx->ac, &resinfo)); + + /* Compute -0.5 / size. */ + for (unsigned c = 0; c < 2; c++) { + half_texel[c] = + LLVMBuildExtractElement(builder, texsize, + LLVMConstInt(ctx->i32, c, 0), ""); + half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, ""); + half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, half_texel[c]); + half_texel[c] = LLVMBuildFMul(builder, half_texel[c], + LLVMConstReal(ctx->f32, -0.5), ""); + } - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - NULL, &address, 1, 0xf); + if (wa_8888) { + lp_build_endif(&if_ctx); + + LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block }; + + for (unsigned c = 0; c < 2; c++) { + LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 }; + half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2, + values, bb); + } + } + } + + for (unsigned c = 0; c < 2; c++) { + LLVMValueRef tmp; + tmp = ac_to_float(&ctx->ac, args->coords[c]); + tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); + args->coords[c] = ac_to_integer(&ctx->ac, tmp); + } + + return wa_8888; } -static void txq_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) +/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the + * result after the gather operation. + */ +static LLVMValueRef +si_fix_gather4_integer_result(struct si_shader_context *ctx, + LLVMValueRef result, + enum tgsi_return_type return_type, + LLVMValueRef wa) { - struct si_shader_context *ctx = si_shader_context(bld_base); - struct ac_image_args args; - unsigned target = emit_data->inst->Texture.Texture; + LLVMBuilderRef builder = ctx->ac.builder; - if (target == TGSI_TEXTURE_BUFFER) { - /* Just return the buffer size. */ - emit_data->output[emit_data->chan] = emit_data->args[0]; - return; - } + assert(return_type == TGSI_RETURN_TYPE_SINT || + return_type == TGSI_RETURN_TYPE_UINT); + + for (unsigned chan = 0; chan < 4; ++chan) { + LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false); + LLVMValueRef value; + LLVMValueRef wa_value; - memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ + value = LLVMBuildExtractElement(builder, result, chanv, ""); - args.opcode = ac_image_get_resinfo; - LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args); + if (return_type == TGSI_RETURN_TYPE_UINT) + wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, ""); + else + wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, ""); + wa_value = ac_to_float(&ctx->ac, wa_value); + value = LLVMBuildSelect(builder, wa, wa_value, value, ""); - emit_data->output[emit_data->chan] = fix_resinfo(ctx, target, result); + result = LLVMBuildInsertElement(builder, result, value, chanv, ""); + } + + return result; } -static void tex_fetch_args( - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) +static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction *inst = emit_data->inst; unsigned opcode = inst->Instruction.Opcode; unsigned target = inst->Texture.Texture; - LLVMValueRef coords[5], derivs[6]; - LLVMValueRef address[16]; - unsigned num_coords = tgsi_util_get_texture_coord_dim(target); + struct ac_image_args args = {}; int ref_pos = tgsi_util_get_shadow_ref_src_index(target); - unsigned count = 0; unsigned chan; - unsigned num_deriv_channels = 0; bool has_offset = inst->Texture.NumOffsets > 0; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; - unsigned dmask = 0xf; + LLVMValueRef fmask_ptr = NULL; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr); if (target == TGSI_TEXTURE_BUFFER) { - emit_data->dst_type = ctx->v4f32; - emit_data->args[0] = res_ptr; - emit_data->args[1] = ctx->i32_0; - emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); - emit_data->arg_count = 3; + LLVMValueRef vindex = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + unsigned num_channels = + util_last_bit(inst->Dst[0].Register.WriteMask); + LLVMValueRef result = + ac_build_buffer_load_format(&ctx->ac, + args.resource, + vindex, + ctx->i32_0, + num_channels, 0, true); + emit_data->output[emit_data->chan] = + ac_build_expand_to_vec4(&ctx->ac, result, num_channels); return; } /* Fetch and project texture coordinates */ - coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + args.coords[3] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_W); for (chan = 0; chan < 3; chan++) { - coords[chan] = lp_build_emit_fetch(bld_base, - emit_data->inst, 0, - chan); + args.coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); if (opcode == TGSI_OPCODE_TXP) - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, - coords[chan], - coords[3]); + args.coords[chan] = ac_build_fdiv(&ctx->ac, + args.coords[chan], args.coords[3]); } if (opcode == TGSI_OPCODE_TXP) - coords[3] = ctx->ac.f32_1; + args.coords[3] = ctx->ac.f32_1; /* Pack offsets. */ if (has_offset && @@ -1332,8 +1400,7 @@ static void tex_fetch_args( assert(inst->Texture.NumOffsets == 1); for (chan = 0; chan < 3; chan++) { - offset[chan] = lp_build_emit_fetch_texoffset(bld_base, - emit_data->inst, 0, chan); + offset[chan] = lp_build_emit_fetch_texoffset(bld_base, inst, 0, chan); offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->i32, 0x3f, 0), ""); if (chan) @@ -1343,14 +1410,14 @@ static void tex_fetch_args( pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; + args.offset = pack; } /* Pack LOD bias value */ if (opcode == TGSI_OPCODE_TXB) - address[count++] = coords[3]; + args.bias = args.coords[3]; if (opcode == TGSI_OPCODE_TXB2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); /* Pack depth comparison value */ if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { @@ -1360,7 +1427,7 @@ static void tex_fetch_args( z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); } else { assert(ref_pos >= 0); - z = coords[ref_pos]; + z = args.coords[ref_pos]; } /* Section 8.23.1 (Depth Texture Comparison Mode) of the @@ -1372,12 +1439,14 @@ static void tex_fetch_args( * * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, * so the depth comparison value isn't clamped for Z16 and - * Z24 anymore. Do it manually here. + * Z24 anymore. Do it manually here for GFX8-9; GFX10 has + * an explicitly clamped 32-bit float format. */ - if (ctx->screen->b.chip_class >= VI) { + if (ctx->screen->info.chip_class >= GFX8 && + ctx->screen->info.chip_class <= GFX9) { LLVMValueRef upgraded; LLVMValueRef clamped; - upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr, + upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, LLVMConstInt(ctx->i32, 3, false), ""); upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->i32, 29, false), ""); @@ -1386,7 +1455,7 @@ static void tex_fetch_args( z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, ""); } - address[count++] = z; + args.compare = z; } /* Pack user derivatives */ @@ -1397,7 +1466,6 @@ static void tex_fetch_args( case TGSI_TEXTURE_3D: num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 3; break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -1407,7 +1475,6 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW2D_ARRAY: num_src_deriv_channels = 2; num_dst_deriv_channels = 2; - num_deriv_channels = 2; break; case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: @@ -1416,7 +1483,6 @@ static void tex_fetch_args( /* Cube derivatives will be converted to 2D. */ num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 2; break; case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1425,12 +1491,10 @@ static void tex_fetch_args( num_src_deriv_channels = 1; /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class == GFX9) { num_dst_deriv_channels = 2; - num_deriv_channels = 2; } else { num_dst_deriv_channels = 1; - num_deriv_channels = 1; } break; default: @@ -1439,13 +1503,13 @@ static void tex_fetch_args( for (param = 0; param < 2; param++) { for (chan = 0; chan < num_src_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = lp_build_emit_fetch(bld_base, inst, param+1, chan); /* Fill in the rest with zeros. */ for (chan = num_src_deriv_channels; chan < num_dst_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = ctx->ac.f32_0; } } @@ -1459,30 +1523,17 @@ static void tex_fetch_args( target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, opcode == TGSI_OPCODE_LODQ, - coords, derivs); + args.coords, args.derivs); } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && - ctx->screen->b.chip_class <= VI) { + ctx->screen->info.chip_class <= GFX8) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; - coords[array_coord] = - ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, - &coords[array_coord], 1, 0); + args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]); } - if (opcode == TGSI_OPCODE_TXD) - for (int i = 0; i < num_deriv_channels * 2; i++) - address[count++] = derivs[i]; - - /* Pack texture coordinates */ - address[count++] = coords[0]; - if (num_coords > 1) - address[count++] = coords[1]; - if (num_coords > 2) - address[count++] = coords[2]; - /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->b.chip_class >= GFX9) { + if (ctx->screen->info.chip_class == GFX9) { LLVMValueRef filler; /* Use 0.5, so that we don't sample the border color. */ @@ -1494,118 +1545,48 @@ static void tex_fetch_args( if (target == TGSI_TEXTURE_1D || target == TGSI_TEXTURE_SHADOW1D) { - address[count++] = filler; + args.coords[1] = filler; } else if (target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; + args.coords[2] = args.coords[1]; + args.coords[1] = filler; } } /* Pack LOD or sample index */ - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) - address[count++] = coords[3]; + if (opcode == TGSI_OPCODE_TXL) + args.lod = args.coords[3]; else if (opcode == TGSI_OPCODE_TXL2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - - if (count > 16) { - assert(!"Cannot handle more than 16 texture address parameters"); - count = 16; + args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + else if (opcode == TGSI_OPCODE_TXF) { + if (target == TGSI_TEXTURE_2D_MSAA) { + /* No LOD, but move sample index into the right place. */ + args.coords[2] = args.coords[3]; + } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) { + args.lod = args.coords[3]; + } } - for (chan = 0; chan < count; chan++) - address[chan] = ac_to_integer(&ctx->ac, address[chan]); - - /* Adjust the sample index according to FMASK. - * - * For uncompressed MSAA surfaces, FMASK should return 0x76543210, - * which is the identity mapping. Each nibble says which physical sample - * should be fetched to get that sample. - * - * For example, 0x11111100 means there are only 2 samples stored and - * the second sample covers 3/4 of the pixel. When reading samples 0 - * and 1, return physical sample 0 (determined by the first two 0s - * in FMASK), otherwise return physical sample 1. - * - * The sample index should be adjusted as follows: - * sample_index = (fmask >> (sample_index * 4)) & 0xF; - */ - if (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - struct lp_build_emit_data txf_emit_data = *emit_data; - LLVMValueRef txf_address[4]; - /* We only need .xy for non-arrays, and .xyz for arrays. */ - unsigned txf_count = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; - struct tgsi_full_instruction inst = {}; - - memcpy(txf_address, address, sizeof(txf_address)); - - /* Read FMASK using TXF_LZ. */ - inst.Instruction.Opcode = TGSI_OPCODE_TXF_LZ; - inst.Texture.Texture = target; - txf_emit_data.inst = &inst; - txf_emit_data.chan = 0; - set_tex_fetch_args(ctx, &txf_emit_data, - target, fmask_ptr, NULL, - txf_address, txf_count, 0xf); - build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data); - - /* Initialize some constants. */ - LLVMValueRef four = LLVMConstInt(ctx->i32, 4, 0); - LLVMValueRef F = LLVMConstInt(ctx->i32, 0xF, 0); - - /* Apply the formula. */ - LLVMValueRef fmask = - LLVMBuildExtractElement(ctx->ac.builder, - txf_emit_data.output[0], - ctx->i32_0, ""); - - unsigned sample_chan = txf_count; /* the sample index is last */ - - LLVMValueRef sample_index4 = - LLVMBuildMul(ctx->ac.builder, address[sample_chan], four, ""); - - LLVMValueRef shifted_fmask = - LLVMBuildLShr(ctx->ac.builder, fmask, sample_index4, ""); - - LLVMValueRef final_sample = - LLVMBuildAnd(ctx->ac.builder, shifted_fmask, F, ""); - - /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK - * resource descriptor is 0 (invalid), - */ - LLVMValueRef fmask_desc = - LLVMBuildBitCast(ctx->ac.builder, fmask_ptr, - ctx->v8i32, ""); - - LLVMValueRef fmask_word1 = - LLVMBuildExtractElement(ctx->ac.builder, fmask_desc, - ctx->i32_1, ""); - - LLVMValueRef word1_is_nonzero = - LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, - fmask_word1, ctx->i32_0, ""); - - /* Replace the MSAA sample index. */ - address[sample_chan] = - LLVMBuildSelect(ctx->ac.builder, word1_is_nonzero, - final_sample, address[sample_chan], ""); + if ((target == TGSI_TEXTURE_2D_MSAA || + target == TGSI_TEXTURE_2D_ARRAY_MSAA) && + !(ctx->screen->debug_flags & DBG(NO_FMASK))) { + ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords, + target == TGSI_TEXTURE_2D_ARRAY_MSAA); } if (opcode == TGSI_OPCODE_TXF || opcode == TGSI_OPCODE_TXF_LZ) { /* add tex offsets */ if (inst->Texture.NumOffsets) { - struct lp_build_context *uint_bld = &bld_base->uint_bld; const struct tgsi_texture_offset *off = inst->TexOffsets; assert(inst->Texture.NumOffsets == 1); switch (target) { case TGSI_TEXTURE_3D: - address[2] = lp_build_add(uint_bld, address[2], - ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]); + args.coords[2] = + LLVMBuildAdd(ctx->ac.builder, args.coords[2], + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ], ""); /* fall through */ case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -1613,17 +1594,17 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D_ARRAY: - address[1] = - lp_build_add(uint_bld, address[1], - ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]); + args.coords[1] = + LLVMBuildAdd(ctx->ac.builder, args.coords[1], + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY], ""); /* fall through */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_1D_ARRAY: case TGSI_TEXTURE_SHADOW1D_ARRAY: - address[0] = - lp_build_add(uint_bld, address[0], - ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]); + args.coords[0] = + LLVMBuildAdd(ctx->ac.builder, args.coords[0], + ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX], ""); break; /* texture offsets do not apply to other texture targets */ } @@ -1653,204 +1634,15 @@ static void tex_fetch_args( gather_comp = CLAMP(gather_comp, 0, 3); } - dmask = 1 << gather_comp; - } - - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - samp_ptr, address, count, dmask); -} - -/* Gather4 should follow the same rules as bilinear filtering, but the hardware - * incorrectly forces nearest filtering if the texture format is integer. - * The only effect it has on Gather4, which always returns 4 texels for - * bilinear filtering, is that the final coordinates are off by 0.5 of - * the texel size. - * - * The workaround is to subtract 0.5 from the unnormalized coordinates, - * or (0.5 / size) from the normalized coordinates. - * - * However, cube textures with 8_8_8_8 data formats require a different - * workaround of overriding the num format to USCALED/SSCALED. This would lose - * precision in 32-bit data formats, so it needs to be applied dynamically at - * runtime. In this case, return an i1 value that indicates whether the - * descriptor was overridden (and hence a fixup of the sampler result is needed). - */ -static LLVMValueRef -si_lower_gather4_integer(struct si_shader_context *ctx, - struct ac_image_args *args, - unsigned target, - enum tgsi_return_type return_type) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef wa_8888 = NULL; - LLVMValueRef coord = args->addr; - LLVMValueRef half_texel[2]; - /* Texture coordinates start after: - * {offset, bias, z-compare, derivatives} - * Only the offset and z-compare can occur here. - */ - unsigned coord_vgpr_index = (int)args->offset + (int)args->compare; - int c; - - assert(return_type == TGSI_RETURN_TYPE_SINT || - return_type == TGSI_RETURN_TYPE_UINT); - - if (target == TGSI_TEXTURE_CUBE || - target == TGSI_TEXTURE_CUBE_ARRAY) { - LLVMValueRef formats; - LLVMValueRef data_format; - LLVMValueRef wa_formats; - - formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, ""); - - data_format = LLVMBuildLShr(builder, formats, - LLVMConstInt(ctx->i32, 20, false), ""); - data_format = LLVMBuildAnd(builder, data_format, - LLVMConstInt(ctx->i32, (1u << 6) - 1, false), ""); - wa_8888 = LLVMBuildICmp( - builder, LLVMIntEQ, data_format, - LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), - ""); - - uint32_t wa_num_format = - return_type == TGSI_RETURN_TYPE_UINT ? - S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_USCALED) : - S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_SSCALED); - wa_formats = LLVMBuildAnd(builder, formats, - LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), - ""); - wa_formats = LLVMBuildOr(builder, wa_formats, - LLVMConstInt(ctx->i32, wa_num_format, false), ""); - - formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, ""); - args->resource = LLVMBuildInsertElement( - builder, args->resource, formats, ctx->i32_1, ""); - } - - if (target == TGSI_TEXTURE_RECT || - target == TGSI_TEXTURE_SHADOWRECT) { - assert(!wa_8888); - half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); + args.dmask = 1 << gather_comp; } else { - struct tgsi_full_instruction txq_inst = {}; - struct lp_build_emit_data txq_emit_data = {}; - struct lp_build_if_state if_ctx; - - if (wa_8888) { - /* Skip the texture size query entirely if we don't need it. */ - lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, "")); - } - - /* Query the texture size. */ - txq_inst.Texture.Texture = target; - txq_emit_data.inst = &txq_inst; - txq_emit_data.dst_type = ctx->v4i32; - set_tex_fetch_args(ctx, &txq_emit_data, target, - args->resource, NULL, &ctx->i32_0, - 1, 0xf); - txq_emit(NULL, &ctx->bld_base, &txq_emit_data); - - /* Compute -0.5 / size. */ - for (c = 0; c < 2; c++) { - half_texel[c] = - LLVMBuildExtractElement(builder, txq_emit_data.output[0], - LLVMConstInt(ctx->i32, c, 0), ""); - half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, ""); - half_texel[c] = - lp_build_emit_llvm_unary(&ctx->bld_base, - TGSI_OPCODE_RCP, half_texel[c]); - half_texel[c] = LLVMBuildFMul(builder, half_texel[c], - LLVMConstReal(ctx->f32, -0.5), ""); - } - - if (wa_8888) { - lp_build_endif(&if_ctx); - - LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block }; - - for (c = 0; c < 2; c++) { - LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 }; - half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2, - values, bb); - } - } - } - - for (c = 0; c < 2; c++) { - LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - - tmp = LLVMBuildExtractElement(builder, coord, index, ""); - tmp = ac_to_float(&ctx->ac, tmp); - tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); - tmp = ac_to_integer(&ctx->ac, tmp); - coord = LLVMBuildInsertElement(builder, coord, tmp, index, ""); - } - - args->addr = coord; - - return wa_8888; -} - -/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the - * result after the gather operation. - */ -static LLVMValueRef -si_fix_gather4_integer_result(struct si_shader_context *ctx, - LLVMValueRef result, - enum tgsi_return_type return_type, - LLVMValueRef wa) -{ - LLVMBuilderRef builder = ctx->ac.builder; - - assert(return_type == TGSI_RETURN_TYPE_SINT || - return_type == TGSI_RETURN_TYPE_UINT); - - for (unsigned chan = 0; chan < 4; ++chan) { - LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false); - LLVMValueRef value; - LLVMValueRef wa_value; - - value = LLVMBuildExtractElement(builder, result, chanv, ""); - - if (return_type == TGSI_RETURN_TYPE_UINT) - wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, ""); - else - wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, ""); - wa_value = ac_to_float(&ctx->ac, wa_value); - value = LLVMBuildSelect(builder, wa, wa_value, value, ""); - - result = LLVMBuildInsertElement(builder, result, value, chanv, ""); - } - - return result; -} - -static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction *inst = emit_data->inst; - struct ac_image_args args; - unsigned opcode = inst->Instruction.Opcode; - unsigned target = inst->Texture.Texture; - - if (target == TGSI_TEXTURE_BUFFER) { - emit_data->output[emit_data->chan] = - ac_build_buffer_load_format(&ctx->ac, - emit_data->args[0], - emit_data->args[2], - emit_data->args[1], - true); - return; + args.dmask = 0xf; } - memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ - + args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); + args.unorm = target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT; args.opcode = ac_image_sample; - args.compare = tgsi_is_shadow_target(target); - args.offset = inst->Texture.NumOffsets > 0; switch (opcode) { case TGSI_OPCODE_TXF: @@ -1859,13 +1651,9 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA ? ac_image_load : ac_image_load_mip; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_LODQ: args.opcode = ac_image_get_lod; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TEX2: @@ -1879,14 +1667,11 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXB2: assert(ctx->type == PIPE_SHADER_FRAGMENT); - args.bias = true; break; case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXL2: - args.lod = true; break; case TGSI_OPCODE_TXD: - args.deriv = true; break; case TGSI_OPCODE_TG4: args.opcode = ac_image_gather4; @@ -1900,7 +1685,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, /* The hardware needs special lowering for Gather4 with integer formats. */ LLVMValueRef gather4_int_result_workaround = NULL; - if (ctx->screen->b.chip_class <= VI && + if (ctx->screen->info.chip_class <= GFX8 && opcode == TGSI_OPCODE_TG4) { assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); @@ -1912,8 +1697,8 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, } } - LLVMValueRef result = - ac_build_image_opcode(&ctx->ac, &args); + args.attributes = AC_FUNC_ATTR_READNONE; + LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args); if (gather4_int_result_workaround) { result = si_fix_gather4_integer_result(ctx, result, @@ -1930,85 +1715,140 @@ static void si_llvm_emit_txqs( struct lp_build_emit_data *emit_data) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef res, samples; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; + LLVMValueRef rsrc; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL); + rsrc = LLVMBuildBitCast(ctx->ac.builder, rsrc, ctx->v8i32, ""); + emit_data->output[emit_data->chan] = + ac_build_image_get_sample_count(&ctx->ac, rsrc); +} - /* Read the samples from the descriptor directly. */ - res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, ""); - samples = LLVMBuildExtractElement(ctx->ac.builder, res, - LLVMConstInt(ctx->i32, 3, 0), ""); - samples = LLVMBuildLShr(ctx->ac.builder, samples, - LLVMConstInt(ctx->i32, 16, 0), ""); - samples = LLVMBuildAnd(ctx->ac.builder, samples, - LLVMConstInt(ctx->i32, 0xf, 0), ""); - samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1, - samples, ""); +static LLVMValueRef si_llvm_emit_fbfetch(struct si_shader_context *ctx) +{ + struct ac_image_args args = {}; + LLVMValueRef ptr, image, fmask; + + /* Ignore src0, because KHR_blend_func_extended disallows multiple render + * targets. + */ + + /* Load the image descriptor. */ + STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); + ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers); + ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr, + ac_array_in_const32_addr_space(ctx->v8i32), ""); + image = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); + + unsigned chan = 0; + + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); + + if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); + + /* Get the current render target layer index. */ + if (ctx->shader->key.mono.u.ps.fbfetch_layered) + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + args.coords[chan++] = si_get_sample_id(ctx); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa && + !(ctx->screen->debug_flags & DBG(NO_FMASK))) { + fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); + + ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, + ctx->shader->key.mono.u.ps.fbfetch_layered); + } - emit_data->output[emit_data->chan] = samples; + args.opcode = ac_image_load; + args.resource = image; + args.dmask = 0xf; + args.attributes = AC_FUNC_ATTR_READNONE; + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? + ac_image_2darraymsaa : ac_image_2dmsaa; + else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D) + args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? + ac_image_1darray : ac_image_1d; + else + args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? + ac_image_2darray : ac_image_2d; + + return ac_build_image_opcode(&ctx->ac, &args); } -static const struct lp_build_tgsi_action tex_action = { - .fetch_args = tex_fetch_args, - .emit = build_tex_intrinsic, -}; +static void si_tgsi_emit_fbfetch(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct si_shader_context *ctx = si_shader_context(bld_base); + + emit_data->output[emit_data->chan] = si_llvm_emit_fbfetch(ctx); +} + +LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + + return si_llvm_emit_fbfetch(ctx); +} /** * Setup actions for TGSI memory opcode, including texture opcodes. */ void si_shader_context_init_mem(struct si_shader_context *ctx) { - struct lp_build_tgsi_context *bld_base; - struct lp_build_tgsi_action tmpl = {}; - - bld_base = &ctx->bld_base; - - bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TEX_LZ] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; - bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args; - bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; - bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action; - bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action; + struct lp_build_tgsi_context *bld_base = &ctx->bld_base; + + bld_base->op_actions[TGSI_OPCODE_TEX].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TEX_LZ].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TEX2].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXB].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXB2].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXD].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXF].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXF_LZ].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXL].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit; + bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic; + bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic; bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs; - bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args; + bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_tgsi_emit_fbfetch; + bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit; - bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args; bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit; - bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args; bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; - tmpl.fetch_args = atomic_fetch_args; - tmpl.emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add"; - bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap"; - bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap"; - bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and"; - bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or"; - bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor"; - bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin"; - bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax"; - bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin"; - bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl; + bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit; bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax"; + bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].emit = atomic_emit; + bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].intr_name = "inc"; + bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].emit = atomic_emit; + bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].intr_name = "dec"; }