si_shader_llvm_build.c \
si_shader_llvm_gs.c \
si_shader_llvm_ps.c \
+ si_shader_llvm_resources.c \
si_shader_llvm_tess.c \
si_shader_nir.c \
si_shaderlib_tgsi.c \
'si_shader_llvm_build.c',
'si_shader_llvm_gs.c',
'si_shader_llvm_ps.c',
+ 'si_shader_llvm_resources.c',
'si_shader_llvm_tess.c',
'si_shader_nir.c',
'si_shaderlib_tgsi.c',
ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
}
-static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
-{
- LLVMValueRef ptr =
- ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
- struct si_shader_selector *sel = ctx->shader->selector;
-
- /* Do the bounds checking with a descriptor, because
- * doing computation and manual bounds checking of 64-bit
- * addresses generates horrible VALU code with very high
- * VGPR usage and very low SIMD occupancy.
- */
- ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
-
- LLVMValueRef desc0, desc1;
- desc0 = ptr;
- desc1 = LLVMConstInt(ctx->i32,
- S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
-
- uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (ctx->screen->info.chip_class >= GFX10)
- rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- else
- rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
- LLVMValueRef desc_elems[] = {
- desc0,
- desc1,
- LLVMConstInt(ctx->i32, sel->info.constbuf0_num_slots * 16, 0),
- LLVMConstInt(ctx->i32, rsrc3, false)
- };
-
- return ac_build_gather_values(&ctx->ac, desc_elems, 4);
-}
-
-static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct si_shader_selector *sel = ctx->shader->selector;
-
- LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
-
- if (sel->info.const_buffers_declared == 1 &&
- sel->info.shader_buffers_declared == 0) {
- return load_const_buffer_desc_fast_path(ctx);
- }
-
- index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
- index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
-
- return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
-}
-
-static LLVMValueRef
-load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac,
- ctx->const_and_shader_buffers);
-
- index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
- index = LLVMBuildSub(ctx->ac.builder,
- LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
- index, "");
-
- return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
-}
-
/* Initialize arguments for the shader export intrinsic */
static void si_llvm_init_vs_export_args(struct si_shader_context *ctx,
LLVMValueRef *values,
struct si_shader *shader = ctx->shader;
struct si_shader_selector *sel = shader->selector;
+ si_llvm_init_resource_callbacks(ctx);
+
switch (ctx->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.as_ls)
return false;
}
- ctx->abi.load_ubo = load_ubo;
- ctx->abi.load_ssbo = load_ssbo;
-
si_create_function(ctx);
if (ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)
struct ac_llvm_compiler *compiler,
struct pipe_debug_callback *debug,
bool less_optimized, unsigned wave_size);
-
-LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
- LLVMValueRef index,
- unsigned num);
-
void si_llvm_context_init(struct si_shader_context *ctx,
struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
bool load_input);
bool si_is_merged_shader(struct si_shader_context *ctx);
LLVMValueRef si_get_sample_id(struct si_shader_context *ctx);
-LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- enum ac_descriptor_type type);
-LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- enum ac_descriptor_type desc_type,
- bool uses_store, bool bindless);
LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx,
LLVMValueRef resource, LLVMValueRef offset);
void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret);
struct si_shader *shader);
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
+/* si_shader_llvm_resources.c */
+void si_llvm_init_resource_callbacks(struct si_shader_context *ctx);
+
#endif
#include "sid.h"
#include "ac_llvm_util.h"
-/**
- * Return a value that is equal to the given i32 \p index if it lies in [0,num)
- * or an undefined value in the same interval otherwise.
- */
-LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
- LLVMValueRef index,
- unsigned num)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
- LLVMValueRef cc;
-
- if (util_is_power_of_two_or_zero(num)) {
- index = LLVMBuildAnd(builder, index, c_max, "");
- } else {
- /* In theory, this MAX pattern should result in code that is
- * as good as the bit-wise AND above.
- *
- * In practice, LLVM generates worse code (at the time of
- * writing), because its value tracking is not strong enough.
- */
- cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
- index = LLVMBuildSelect(builder, cc, index, c_max, "");
- }
-
- return index;
-}
-
-/**
- * Given a 256-bit resource descriptor, force the DCC enable bit to off.
- *
- * At least on Tonga, executing image stores on images with DCC enabled and
- * non-trivial can eventually lead to lockups. This can occur when an
- * application binds an image as read-only but then uses a shader that writes
- * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
- * program termination) in this case, but it doesn't cost much to be a bit
- * nicer: disabling DCC in the shader still leads to undefined results but
- * avoids the lockup.
- */
-static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
- LLVMValueRef rsrc)
-{
- if (ctx->screen->info.chip_class <= GFX7) {
- return rsrc;
- } else {
- LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
- LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
- LLVMValueRef tmp;
-
- tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
- tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
- return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
- }
-}
-
-/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
- * adjust "index" to point to FMASK. */
-LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- enum ac_descriptor_type desc_type,
- bool uses_store, bool bindless)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef rsrc;
-
- if (desc_type == AC_DESC_BUFFER) {
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
- ctx->i32_1);
- list = LLVMBuildPointerCast(builder, list,
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- } else {
- assert(desc_type == AC_DESC_IMAGE ||
- desc_type == AC_DESC_FMASK);
- }
-
- if (bindless)
- rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
- else
- rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
-
- if (desc_type == AC_DESC_IMAGE && uses_store)
- rsrc = force_dcc_off(ctx, rsrc);
- return rsrc;
-}
-
-/**
- * Load an image view, fmask view. or sampler state descriptor.
- */
-LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
- LLVMValueRef list, LLVMValueRef index,
- enum ac_descriptor_type type)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
-
- switch (type) {
- case AC_DESC_IMAGE:
- /* The image is at [0:7]. */
- index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
- break;
- case AC_DESC_BUFFER:
- /* The buffer is in [4:7]. */
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
- ctx->i32_1);
- list = LLVMBuildPointerCast(builder, list,
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- break;
- case AC_DESC_FMASK:
- /* The FMASK is at [8:15]. */
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
- ctx->i32_1);
- break;
- case AC_DESC_SAMPLER:
- /* The sampler state is at [12:15]. */
- index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
- LLVMConstInt(ctx->i32, 3, 0));
- list = LLVMBuildPointerCast(builder, list,
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- break;
- case AC_DESC_PLANE_0:
- case AC_DESC_PLANE_1:
- case AC_DESC_PLANE_2:
- /* Only used for the multiplane image support for Vulkan. Should
- * never be reached in radeonsi.
- */
- unreachable("Plane descriptor requested in radeonsi.");
- }
-
- return ac_build_load_to_sgpr(&ctx->ac, list, index);
-}
-
/**
* Load a dword from a constant buffer.
*/
--- /dev/null
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_shader_internal.h"
+#include "si_pipe.h"
+#include "sid.h"
+
+/**
+ * Return a value that is equal to the given i32 \p index if it lies in [0,num)
+ * or an undefined value in the same interval otherwise.
+ */
+static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
+ LLVMValueRef index,
+ unsigned num)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
+ LLVMValueRef cc;
+
+ if (util_is_power_of_two_or_zero(num)) {
+ index = LLVMBuildAnd(builder, index, c_max, "");
+ } else {
+ /* In theory, this MAX pattern should result in code that is
+ * as good as the bit-wise AND above.
+ *
+ * In practice, LLVM generates worse code (at the time of
+ * writing), because its value tracking is not strong enough.
+ */
+ cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
+ index = LLVMBuildSelect(builder, cc, index, c_max, "");
+ }
+
+ return index;
+}
+
+static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
+{
+ LLVMValueRef ptr =
+ ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
+ struct si_shader_selector *sel = ctx->shader->selector;
+
+ /* Do the bounds checking with a descriptor, because
+ * doing computation and manual bounds checking of 64-bit
+ * addresses generates horrible VALU code with very high
+ * VGPR usage and very low SIMD occupancy.
+ */
+ ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
+
+ LLVMValueRef desc0, desc1;
+ desc0 = ptr;
+ desc1 = LLVMConstInt(ctx->i32,
+ S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
+
+ uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+ S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (ctx->screen->info.chip_class >= GFX10)
+ rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ else
+ rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ LLVMValueRef desc_elems[] = {
+ desc0,
+ desc1,
+ LLVMConstInt(ctx->i32, sel->info.constbuf0_num_slots * 16, 0),
+ LLVMConstInt(ctx->i32, rsrc3, false)
+ };
+
+ return ac_build_gather_values(&ctx->ac, desc_elems, 4);
+}
+
+static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct si_shader_selector *sel = ctx->shader->selector;
+
+ LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
+
+ if (sel->info.const_buffers_declared == 1 &&
+ sel->info.shader_buffers_declared == 0) {
+ return load_const_buffer_desc_fast_path(ctx);
+ }
+
+ index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
+ index = LLVMBuildAdd(ctx->ac.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
+
+ return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
+}
+
+static LLVMValueRef
+load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac,
+ ctx->const_and_shader_buffers);
+
+ index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
+ index = LLVMBuildSub(ctx->ac.builder,
+ LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
+ index, "");
+
+ return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
+}
+
+/**
+ * Given a 256-bit resource descriptor, force the DCC enable bit to off.
+ *
+ * At least on Tonga, executing image stores on images with DCC enabled and
+ * non-trivial can eventually lead to lockups. This can occur when an
+ * application binds an image as read-only but then uses a shader that writes
+ * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
+ * program termination) in this case, but it doesn't cost much to be a bit
+ * nicer: disabling DCC in the shader still leads to undefined results but
+ * avoids the lockup.
+ */
+static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
+ LLVMValueRef rsrc)
+{
+ if (ctx->screen->info.chip_class <= GFX7) {
+ return rsrc;
+ } else {
+ LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
+ LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
+ LLVMValueRef tmp;
+
+ tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
+ tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
+ return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
+ }
+}
+
+/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
+ * adjust "index" to point to FMASK. */
+static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type desc_type,
+ bool uses_store, bool bindless)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef rsrc;
+
+ if (desc_type == AC_DESC_BUFFER) {
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+ ctx->i32_1);
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
+ } else {
+ assert(desc_type == AC_DESC_IMAGE ||
+ desc_type == AC_DESC_FMASK);
+ }
+
+ if (bindless)
+ rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
+ else
+ rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
+
+ if (desc_type == AC_DESC_IMAGE && uses_store)
+ rsrc = force_dcc_off(ctx, rsrc);
+ return rsrc;
+}
+
+/**
+ * Load an image view, fmask view. or sampler state descriptor.
+ */
+static LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
+ LLVMValueRef list, LLVMValueRef index,
+ enum ac_descriptor_type type)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+
+ switch (type) {
+ case AC_DESC_IMAGE:
+ /* The image is at [0:7]. */
+ index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
+ break;
+ case AC_DESC_BUFFER:
+ /* The buffer is in [4:7]. */
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+ ctx->i32_1);
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
+ break;
+ case AC_DESC_FMASK:
+ /* The FMASK is at [8:15]. */
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
+ ctx->i32_1);
+ break;
+ case AC_DESC_SAMPLER:
+ /* The sampler state is at [12:15]. */
+ index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
+ LLVMConstInt(ctx->i32, 3, 0));
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
+ break;
+ case AC_DESC_PLANE_0:
+ case AC_DESC_PLANE_1:
+ case AC_DESC_PLANE_2:
+ /* Only used for the multiplane image support for Vulkan. Should
+ * never be reached in radeonsi.
+ */
+ unreachable("Plane descriptor requested in radeonsi.");
+ }
+
+ return ac_build_load_to_sgpr(&ctx->ac, list, index);
+}
+
+static LLVMValueRef
+si_nir_load_sampler_desc(struct ac_shader_abi *abi,
+ unsigned descriptor_set, unsigned base_index,
+ unsigned constant_index, LLVMValueRef dynamic_index,
+ enum ac_descriptor_type desc_type, bool image,
+ bool write, bool bindless)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ unsigned const_index = base_index + constant_index;
+
+ assert(!descriptor_set);
+ assert(desc_type <= AC_DESC_BUFFER);
+
+ if (bindless) {
+ LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
+
+ /* dynamic_index is the bindless handle */
+ if (image) {
+ /* Bindless image descriptors use 16-dword slots. */
+ dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
+ LLVMConstInt(ctx->i64, 2, 0), "");
+ /* FMASK is right after the image. */
+ if (desc_type == AC_DESC_FMASK) {
+ dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+ ctx->i32_1, "");
+ }
+
+ return si_load_image_desc(ctx, list, dynamic_index, desc_type,
+ write, true);
+ }
+
+ /* Since bindless handle arithmetic can contain an unsigned integer
+ * wraparound and si_load_sampler_desc assumes there isn't any,
+ * use GEP without "inbounds" (inside ac_build_pointer_add)
+ * to prevent incorrect code generation and hangs.
+ */
+ dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
+ LLVMConstInt(ctx->i64, 2, 0), "");
+ list = ac_build_pointer_add(&ctx->ac, list, dynamic_index);
+ return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type);
+ }
+
+ unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
+ assert(const_index < num_slots || dynamic_index);
+
+ LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
+ LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
+
+ if (dynamic_index) {
+ index = LLVMBuildAdd(builder, index, dynamic_index, "");
+
+ /* From the GL_ARB_shader_image_load_store extension spec:
+ *
+ * If a shader performs an image load, store, or atomic
+ * operation using an image variable declared as an array,
+ * and if the index used to select an individual element is
+ * negative or greater than or equal to the size of the
+ * array, the results of the operation are undefined but may
+ * not lead to termination.
+ */
+ index = si_llvm_bound_index(ctx, index, num_slots);
+ }
+
+ if (image) {
+ /* FMASKs are separate from images. */
+ if (desc_type == AC_DESC_FMASK) {
+ index = LLVMBuildAdd(ctx->ac.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
+ }
+ index = LLVMBuildSub(ctx->ac.builder,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
+ index, "");
+ return si_load_image_desc(ctx, list, index, desc_type, write, false);
+ }
+
+ index = LLVMBuildAdd(ctx->ac.builder, index,
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
+ return si_load_sampler_desc(ctx, list, index, desc_type);
+}
+
+void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
+{
+ ctx->abi.load_ubo = load_ubo;
+ ctx->abi.load_ssbo = load_ssbo;
+ ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
+}
si_llvm_load_input_vs(ctx, input_index, out);
}
-static LLVMValueRef
-si_nir_load_sampler_desc(struct ac_shader_abi *abi,
- unsigned descriptor_set, unsigned base_index,
- unsigned constant_index, LLVMValueRef dynamic_index,
- enum ac_descriptor_type desc_type, bool image,
- bool write, bool bindless)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- LLVMBuilderRef builder = ctx->ac.builder;
- unsigned const_index = base_index + constant_index;
-
- assert(!descriptor_set);
- assert(desc_type <= AC_DESC_BUFFER);
-
- if (bindless) {
- LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
-
- /* dynamic_index is the bindless handle */
- if (image) {
- /* Bindless image descriptors use 16-dword slots. */
- dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
- LLVMConstInt(ctx->i64, 2, 0), "");
- /* FMASK is right after the image. */
- if (desc_type == AC_DESC_FMASK) {
- dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
- ctx->i32_1, "");
- }
-
- return si_load_image_desc(ctx, list, dynamic_index, desc_type,
- write, true);
- }
-
- /* Since bindless handle arithmetic can contain an unsigned integer
- * wraparound and si_load_sampler_desc assumes there isn't any,
- * use GEP without "inbounds" (inside ac_build_pointer_add)
- * to prevent incorrect code generation and hangs.
- */
- dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
- LLVMConstInt(ctx->i64, 2, 0), "");
- list = ac_build_pointer_add(&ctx->ac, list, dynamic_index);
- return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type);
- }
-
- unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
- assert(const_index < num_slots || dynamic_index);
-
- LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
- LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
-
- if (dynamic_index) {
- index = LLVMBuildAdd(builder, index, dynamic_index, "");
-
- /* From the GL_ARB_shader_image_load_store extension spec:
- *
- * If a shader performs an image load, store, or atomic
- * operation using an image variable declared as an array,
- * and if the index used to select an individual element is
- * negative or greater than or equal to the size of the
- * array, the results of the operation are undefined but may
- * not lead to termination.
- */
- index = si_llvm_bound_index(ctx, index, num_slots);
- }
-
- if (image) {
- /* FMASKs are separate from images. */
- if (desc_type == AC_DESC_FMASK) {
- index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
- }
- index = LLVMBuildSub(ctx->ac.builder,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
- index, "");
- return si_load_image_desc(ctx, list, index, desc_type, write, false);
- }
-
- index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
- return si_load_sampler_desc(ctx, list, index, desc_type);
-}
-
static void bitcast_inputs(struct si_shader_context *ctx,
LLVMValueRef data[4],
unsigned input_idx)
}
ctx->abi.inputs = &ctx->inputs[0];
- ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
ctx->abi.clamp_shadow_reference = true;
ctx->abi.robust_buffer_access = true;