From: Marek Olšák Date: Wed, 15 Jan 2020 01:17:08 +0000 (-0500) Subject: radeonsi: move code for shader resources into si_shader_llvm_resources.c X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=30b14ba67e998cbc12447adc6c8e185636b75390 radeonsi: move code for shader resources into si_shader_llvm_resources.c Reviewed-by: Timothy Arceri Part-of: --- diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 152e8c74689..73e544a1d5d 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -39,6 +39,7 @@ C_SOURCES := \ si_shader_llvm_build.c \ si_shader_llvm_gs.c \ si_shader_llvm_ps.c \ + si_shader_llvm_resources.c \ si_shader_llvm_tess.c \ si_shader_nir.c \ si_shaderlib_tgsi.c \ diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 9bf63f57b2c..5a09c0a923b 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -54,6 +54,7 @@ files_libradeonsi = files( 'si_shader_llvm_build.c', 'si_shader_llvm_gs.c', 'si_shader_llvm_ps.c', + 'si_shader_llvm_resources.c', 'si_shader_llvm_tess.c', 'si_shader_nir.c', 'si_shaderlib_tgsi.c', diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ab6751f44b4..add0ff1fcf0 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -492,81 +492,6 @@ void si_declare_compute_memory(struct si_shader_context *ctx) ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, ""); } -static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx) -{ - LLVMValueRef ptr = - ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers); - struct si_shader_selector *sel = ctx->shader->selector; - - /* Do the bounds checking with a descriptor, because - * doing computation and manual bounds checking of 64-bit - * addresses generates horrible VALU code with very high - * VGPR usage and very low SIMD occupancy. - */ - ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); - - LLVMValueRef desc0, desc1; - desc0 = ptr; - desc1 = LLVMConstInt(ctx->i32, - S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); - - uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - - if (ctx->screen->info.chip_class >= GFX10) - rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | - S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | - S_008F0C_RESOURCE_LEVEL(1); - else - rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); - - LLVMValueRef desc_elems[] = { - desc0, - desc1, - LLVMConstInt(ctx->i32, sel->info.constbuf0_num_slots * 16, 0), - LLVMConstInt(ctx->i32, rsrc3, false) - }; - - return ac_build_gather_values(&ctx->ac, desc_elems, 4); -} - -static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct si_shader_selector *sel = ctx->shader->selector; - - LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers); - - if (sel->info.const_buffers_declared == 1 && - sel->info.shader_buffers_declared == 0) { - return load_const_buffer_desc_fast_path(ctx); - } - - index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); - - return ac_build_load_to_sgpr(&ctx->ac, ptr, index); -} - -static LLVMValueRef -load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac, - ctx->const_and_shader_buffers); - - index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers); - index = LLVMBuildSub(ctx->ac.builder, - LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0), - index, ""); - - return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index); -} - /* Initialize arguments for the shader export intrinsic */ static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, LLVMValueRef *values, @@ -2418,6 +2343,8 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shader *shader = ctx->shader; struct si_shader_selector *sel = shader->selector; + si_llvm_init_resource_callbacks(ctx); + switch (ctx->type) { case PIPE_SHADER_VERTEX: if (shader->key.as_ls) @@ -2459,9 +2386,6 @@ static bool si_build_main_function(struct si_shader_context *ctx, return false; } - ctx->abi.load_ubo = load_ubo; - ctx->abi.load_ssbo = load_ssbo; - si_create_function(ctx); if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY) diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 8279a6826b2..f3b02f26f85 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -211,11 +211,6 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary, struct ac_llvm_compiler *compiler, struct pipe_debug_callback *debug, bool less_optimized, unsigned wave_size); - -LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, - LLVMValueRef index, - unsigned num); - void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen, struct ac_llvm_compiler *compiler, @@ -245,13 +240,6 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, bool load_input); bool si_is_merged_shader(struct si_shader_context *ctx); LLVMValueRef si_get_sample_id(struct si_shader_context *ctx); -LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, - LLVMValueRef list, LLVMValueRef index, - enum ac_descriptor_type type); -LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, - LLVMValueRef list, LLVMValueRef index, - enum ac_descriptor_type desc_type, - bool uses_store, bool bindless); LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx, LLVMValueRef resource, LLVMValueRef offset); void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret); @@ -358,4 +346,7 @@ void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader); void si_llvm_init_ps_callbacks(struct si_shader_context *ctx); +/* si_shader_llvm_resources.c */ +void si_llvm_init_resource_callbacks(struct si_shader_context *ctx); + #endif diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_build.c b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c index 8d0a841973c..722faf45d15 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_build.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c @@ -29,136 +29,6 @@ #include "sid.h" #include "ac_llvm_util.h" -/** - * Return a value that is equal to the given i32 \p index if it lies in [0,num) - * or an undefined value in the same interval otherwise. - */ -LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, - LLVMValueRef index, - unsigned num) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); - LLVMValueRef cc; - - if (util_is_power_of_two_or_zero(num)) { - index = LLVMBuildAnd(builder, index, c_max, ""); - } else { - /* In theory, this MAX pattern should result in code that is - * as good as the bit-wise AND above. - * - * In practice, LLVM generates worse code (at the time of - * writing), because its value tracking is not strong enough. - */ - cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); - index = LLVMBuildSelect(builder, cc, index, c_max, ""); - } - - return index; -} - -/** - * Given a 256-bit resource descriptor, force the DCC enable bit to off. - * - * At least on Tonga, executing image stores on images with DCC enabled and - * non-trivial can eventually lead to lockups. This can occur when an - * application binds an image as read-only but then uses a shader that writes - * to it. The OpenGL spec allows almost arbitrarily bad behavior (including - * program termination) in this case, but it doesn't cost much to be a bit - * nicer: disabling DCC in the shader still leads to undefined results but - * avoids the lockup. - */ -static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, - LLVMValueRef rsrc) -{ - if (ctx->screen->info.chip_class <= GFX7) { - return rsrc; - } else { - LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); - LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0); - LLVMValueRef tmp; - - tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, ""); - tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, ""); - return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, ""); - } -} - -/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should - * adjust "index" to point to FMASK. */ -LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, - LLVMValueRef list, LLVMValueRef index, - enum ac_descriptor_type desc_type, - bool uses_store, bool bindless) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef rsrc; - - if (desc_type == AC_DESC_BUFFER) { - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), - ctx->i32_1); - list = LLVMBuildPointerCast(builder, list, - ac_array_in_const32_addr_space(ctx->v4i32), ""); - } else { - assert(desc_type == AC_DESC_IMAGE || - desc_type == AC_DESC_FMASK); - } - - if (bindless) - rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index); - else - rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); - - if (desc_type == AC_DESC_IMAGE && uses_store) - rsrc = force_dcc_off(ctx, rsrc); - return rsrc; -} - -/** - * Load an image view, fmask view. or sampler state descriptor. - */ -LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, - LLVMValueRef list, LLVMValueRef index, - enum ac_descriptor_type type) -{ - LLVMBuilderRef builder = ctx->ac.builder; - - switch (type) { - case AC_DESC_IMAGE: - /* The image is at [0:7]. */ - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); - break; - case AC_DESC_BUFFER: - /* The buffer is in [4:7]. */ - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), - ctx->i32_1); - list = LLVMBuildPointerCast(builder, list, - ac_array_in_const32_addr_space(ctx->v4i32), ""); - break; - case AC_DESC_FMASK: - /* The FMASK is at [8:15]. */ - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), - ctx->i32_1); - break; - case AC_DESC_SAMPLER: - /* The sampler state is at [12:15]. */ - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), - LLVMConstInt(ctx->i32, 3, 0)); - list = LLVMBuildPointerCast(builder, list, - ac_array_in_const32_addr_space(ctx->v4i32), ""); - break; - case AC_DESC_PLANE_0: - case AC_DESC_PLANE_1: - case AC_DESC_PLANE_2: - /* Only used for the multiplane image support for Vulkan. Should - * never be reached in radeonsi. - */ - unreachable("Plane descriptor requested in radeonsi."); - } - - return ac_build_load_to_sgpr(&ctx->ac, list, index); -} - /** * Load a dword from a constant buffer. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_resources.c b/src/gallium/drivers/radeonsi/si_shader_llvm_resources.c new file mode 100644 index 00000000000..ba23ddb0aa7 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_resources.c @@ -0,0 +1,320 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "si_shader_internal.h" +#include "si_pipe.h" +#include "sid.h" + +/** + * Return a value that is equal to the given i32 \p index if it lies in [0,num) + * or an undefined value in the same interval otherwise. + */ +static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, + LLVMValueRef index, + unsigned num) +{ + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); + LLVMValueRef cc; + + if (util_is_power_of_two_or_zero(num)) { + index = LLVMBuildAnd(builder, index, c_max, ""); + } else { + /* In theory, this MAX pattern should result in code that is + * as good as the bit-wise AND above. + * + * In practice, LLVM generates worse code (at the time of + * writing), because its value tracking is not strong enough. + */ + cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); + index = LLVMBuildSelect(builder, cc, index, c_max, ""); + } + + return index; +} + +static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx) +{ + LLVMValueRef ptr = + ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers); + struct si_shader_selector *sel = ctx->shader->selector; + + /* Do the bounds checking with a descriptor, because + * doing computation and manual bounds checking of 64-bit + * addresses generates horrible VALU code with very high + * VGPR usage and very low SIMD occupancy. + */ + ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, ""); + + LLVMValueRef desc0, desc1; + desc0 = ptr; + desc1 = LLVMConstInt(ctx->i32, + S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); + + uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + + if (ctx->screen->info.chip_class >= GFX10) + rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | + S_008F0C_RESOURCE_LEVEL(1); + else + rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + + LLVMValueRef desc_elems[] = { + desc0, + desc1, + LLVMConstInt(ctx->i32, sel->info.constbuf0_num_slots * 16, 0), + LLVMConstInt(ctx->i32, rsrc3, false) + }; + + return ac_build_gather_values(&ctx->ac, desc_elems, 4); +} + +static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + struct si_shader_selector *sel = ctx->shader->selector; + + LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers); + + if (sel->info.const_buffers_declared == 1 && + sel->info.shader_buffers_declared == 0) { + return load_const_buffer_desc_fast_path(ctx); + } + + index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers); + index = LLVMBuildAdd(ctx->ac.builder, index, + LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); + + return ac_build_load_to_sgpr(&ctx->ac, ptr, index); +} + +static LLVMValueRef +load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac, + ctx->const_and_shader_buffers); + + index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers); + index = LLVMBuildSub(ctx->ac.builder, + LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0), + index, ""); + + return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index); +} + +/** + * Given a 256-bit resource descriptor, force the DCC enable bit to off. + * + * At least on Tonga, executing image stores on images with DCC enabled and + * non-trivial can eventually lead to lockups. This can occur when an + * application binds an image as read-only but then uses a shader that writes + * to it. The OpenGL spec allows almost arbitrarily bad behavior (including + * program termination) in this case, but it doesn't cost much to be a bit + * nicer: disabling DCC in the shader still leads to undefined results but + * avoids the lockup. + */ +static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, + LLVMValueRef rsrc) +{ + if (ctx->screen->info.chip_class <= GFX7) { + return rsrc; + } else { + LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); + LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0); + LLVMValueRef tmp; + + tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, ""); + tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, ""); + return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, ""); + } +} + +/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should + * adjust "index" to point to FMASK. */ +static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, + LLVMValueRef list, LLVMValueRef index, + enum ac_descriptor_type desc_type, + bool uses_store, bool bindless) +{ + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef rsrc; + + if (desc_type == AC_DESC_BUFFER) { + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), + ctx->i32_1); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(ctx->v4i32), ""); + } else { + assert(desc_type == AC_DESC_IMAGE || + desc_type == AC_DESC_FMASK); + } + + if (bindless) + rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index); + else + rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); + + if (desc_type == AC_DESC_IMAGE && uses_store) + rsrc = force_dcc_off(ctx, rsrc); + return rsrc; +} + +/** + * Load an image view, fmask view. or sampler state descriptor. + */ +static LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, + LLVMValueRef list, LLVMValueRef index, + enum ac_descriptor_type type) +{ + LLVMBuilderRef builder = ctx->ac.builder; + + switch (type) { + case AC_DESC_IMAGE: + /* The image is at [0:7]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); + break; + case AC_DESC_BUFFER: + /* The buffer is in [4:7]. */ + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), + ctx->i32_1); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(ctx->v4i32), ""); + break; + case AC_DESC_FMASK: + /* The FMASK is at [8:15]. */ + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), + ctx->i32_1); + break; + case AC_DESC_SAMPLER: + /* The sampler state is at [12:15]. */ + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), + LLVMConstInt(ctx->i32, 3, 0)); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(ctx->v4i32), ""); + break; + case AC_DESC_PLANE_0: + case AC_DESC_PLANE_1: + case AC_DESC_PLANE_2: + /* Only used for the multiplane image support for Vulkan. Should + * never be reached in radeonsi. + */ + unreachable("Plane descriptor requested in radeonsi."); + } + + return ac_build_load_to_sgpr(&ctx->ac, list, index); +} + +static LLVMValueRef +si_nir_load_sampler_desc(struct ac_shader_abi *abi, + unsigned descriptor_set, unsigned base_index, + unsigned constant_index, LLVMValueRef dynamic_index, + enum ac_descriptor_type desc_type, bool image, + bool write, bool bindless) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + LLVMBuilderRef builder = ctx->ac.builder; + unsigned const_index = base_index + constant_index; + + assert(!descriptor_set); + assert(desc_type <= AC_DESC_BUFFER); + + if (bindless) { + LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images); + + /* dynamic_index is the bindless handle */ + if (image) { + /* Bindless image descriptors use 16-dword slots. */ + dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, + LLVMConstInt(ctx->i64, 2, 0), ""); + /* FMASK is right after the image. */ + if (desc_type == AC_DESC_FMASK) { + dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, + ctx->i32_1, ""); + } + + return si_load_image_desc(ctx, list, dynamic_index, desc_type, + write, true); + } + + /* Since bindless handle arithmetic can contain an unsigned integer + * wraparound and si_load_sampler_desc assumes there isn't any, + * use GEP without "inbounds" (inside ac_build_pointer_add) + * to prevent incorrect code generation and hangs. + */ + dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, + LLVMConstInt(ctx->i64, 2, 0), ""); + list = ac_build_pointer_add(&ctx->ac, list, dynamic_index); + return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type); + } + + unsigned num_slots = image ? ctx->num_images : ctx->num_samplers; + assert(const_index < num_slots || dynamic_index); + + LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images); + LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false); + + if (dynamic_index) { + index = LLVMBuildAdd(builder, index, dynamic_index, ""); + + /* From the GL_ARB_shader_image_load_store extension spec: + * + * If a shader performs an image load, store, or atomic + * operation using an image variable declared as an array, + * and if the index used to select an individual element is + * negative or greater than or equal to the size of the + * array, the results of the operation are undefined but may + * not lead to termination. + */ + index = si_llvm_bound_index(ctx, index, num_slots); + } + + if (image) { + /* FMASKs are separate from images. */ + if (desc_type == AC_DESC_FMASK) { + index = LLVMBuildAdd(ctx->ac.builder, index, + LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), ""); + } + index = LLVMBuildSub(ctx->ac.builder, + LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0), + index, ""); + return si_load_image_desc(ctx, list, index, desc_type, write, false); + } + + index = LLVMBuildAdd(ctx->ac.builder, index, + LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), ""); + return si_load_sampler_desc(ctx, list, index, desc_type); +} + +void si_llvm_init_resource_callbacks(struct si_shader_context *ctx) +{ + ctx->abi.load_ubo = load_ubo; + ctx->abi.load_ssbo = load_ssbo; + ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; +} diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index f1969bf4491..bfbfc1aeec1 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -1005,87 +1005,6 @@ static void declare_nir_input_vs(struct si_shader_context *ctx, si_llvm_load_input_vs(ctx, input_index, out); } -static LLVMValueRef -si_nir_load_sampler_desc(struct ac_shader_abi *abi, - unsigned descriptor_set, unsigned base_index, - unsigned constant_index, LLVMValueRef dynamic_index, - enum ac_descriptor_type desc_type, bool image, - bool write, bool bindless) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - LLVMBuilderRef builder = ctx->ac.builder; - unsigned const_index = base_index + constant_index; - - assert(!descriptor_set); - assert(desc_type <= AC_DESC_BUFFER); - - if (bindless) { - LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images); - - /* dynamic_index is the bindless handle */ - if (image) { - /* Bindless image descriptors use 16-dword slots. */ - dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, - LLVMConstInt(ctx->i64, 2, 0), ""); - /* FMASK is right after the image. */ - if (desc_type == AC_DESC_FMASK) { - dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, - ctx->i32_1, ""); - } - - return si_load_image_desc(ctx, list, dynamic_index, desc_type, - write, true); - } - - /* Since bindless handle arithmetic can contain an unsigned integer - * wraparound and si_load_sampler_desc assumes there isn't any, - * use GEP without "inbounds" (inside ac_build_pointer_add) - * to prevent incorrect code generation and hangs. - */ - dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index, - LLVMConstInt(ctx->i64, 2, 0), ""); - list = ac_build_pointer_add(&ctx->ac, list, dynamic_index); - return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type); - } - - unsigned num_slots = image ? ctx->num_images : ctx->num_samplers; - assert(const_index < num_slots || dynamic_index); - - LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images); - LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false); - - if (dynamic_index) { - index = LLVMBuildAdd(builder, index, dynamic_index, ""); - - /* From the GL_ARB_shader_image_load_store extension spec: - * - * If a shader performs an image load, store, or atomic - * operation using an image variable declared as an array, - * and if the index used to select an individual element is - * negative or greater than or equal to the size of the - * array, the results of the operation are undefined but may - * not lead to termination. - */ - index = si_llvm_bound_index(ctx, index, num_slots); - } - - if (image) { - /* FMASKs are separate from images. */ - if (desc_type == AC_DESC_FMASK) { - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), ""); - } - index = LLVMBuildSub(ctx->ac.builder, - LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0), - index, ""); - return si_load_image_desc(ctx, list, index, desc_type, write, false); - } - - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), ""); - return si_load_sampler_desc(ctx, list, index, desc_type); -} - static void bitcast_inputs(struct si_shader_context *ctx, LLVMValueRef data[4], unsigned input_idx) @@ -1174,7 +1093,6 @@ bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir) } ctx->abi.inputs = &ctx->inputs[0]; - ctx->abi.load_sampler_desc = si_nir_load_sampler_desc; ctx->abi.clamp_shadow_reference = true; ctx->abi.robust_buffer_access = true;