struct si_images *images = &ctx->images[shader];
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
struct si_resource *res;
- unsigned desc_slot = si_get_image_slot(slot);
- uint32_t *desc = descs->list + desc_slot * 8;
if (!view || !view->resource) {
si_disable_shader_image(ctx, shader, slot);
if (&images->views[slot] != view)
util_copy_image_view(&images->views[slot], view);
- si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL);
+ si_set_shader_image_desc(ctx, view, skip_decompress,
+ descs->list + si_get_image_slot(slot) * 8,
+ descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8);
if (res->b.b.target == PIPE_BUFFER ||
view->shader_access & SI_IMAGE_ACCESS_AS_BUFFER) {
struct si_descriptors *desc = &sctx->bindless_descriptors;
unsigned desc_slot_offset = img_handle->desc_slot * 16;
struct pipe_image_view *view = &img_handle->view;
- uint32_t desc_list[8];
+ struct pipe_resource *res = view->resource;
+ uint32_t image_desc[16];
+ unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4;
- if (view->resource->target == PIPE_BUFFER)
+ if (res->target == PIPE_BUFFER)
return;
- memcpy(desc_list, desc->list + desc_slot_offset,
- sizeof(desc_list));
+ memcpy(image_desc, desc->list + desc_slot_offset, desc_size);
si_set_shader_image_desc(sctx, view, true,
- desc->list + desc_slot_offset, NULL);
+ desc->list + desc_slot_offset,
+ desc->list + desc_slot_offset + 8);
- if (memcmp(desc_list, desc->list + desc_slot_offset,
- sizeof(desc_list))) {
+ if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) {
img_handle->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
}
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_image_handle *img_handle;
- uint32_t desc_list[8];
+ uint32_t desc_list[16];
uint64_t handle;
if (!view || !view->resource)
return 0;
memset(desc_list, 0, sizeof(desc_list));
- si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
+ si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor);
- si_set_shader_image_desc(sctx, view, false, &desc_list[0], NULL);
+ si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]);
img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
sizeof(desc_list));
bool is_2nd = sctx->chip_class >= GFX9 &&
(i == PIPE_SHADER_TESS_CTRL ||
i == PIPE_SHADER_GEOMETRY);
- unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
+ unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS;
unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
int rel_dw_offset;
struct si_descriptors *desc;
si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);
int j;
- for (j = 0; j < SI_NUM_IMAGES; j++)
+ for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++)
memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
- for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
+ for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++)
memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
}
if (image) {
index = LLVMBuildSub(ctx->ac.builder,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
index, "");
return si_load_image_desc(ctx, list, index, desc_type, write, false);
}
index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
return si_load_sampler_desc(ctx, list, index, desc_type);
}
image->Register.Index,
ctx->num_images);
index = LLVMBuildSub(ctx->ac.builder,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
index, "");
}
reg->Register.Index,
ctx->num_samplers);
index = LLVMBuildAdd(ctx->ac.builder, index,
- LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
+ LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
} else {
index = LLVMConstInt(ctx->i32,
si_get_sampler_slot(reg->Register.Index), 0);
#define SI_NUM_SAMPLERS 32 /* OpenGL textures units per shader */
#define SI_NUM_CONST_BUFFERS 16
#define SI_NUM_IMAGES 16
+#define SI_NUM_IMAGE_SLOTS (SI_NUM_IMAGES * 2) /* the second half are FMASK slots */
#define SI_NUM_SHADER_BUFFERS 16
struct si_screen;
static inline unsigned si_get_sampler_slot(unsigned slot)
{
- /* samplers are in slots [8..39], ascending */
- return SI_NUM_IMAGES / 2 + slot;
+ /* 32 samplers are in sampler slots [16..47], 16 dw per slot, ascending */
+ /* those are equivalent to image slots [32..95], 8 dw per slot, ascending */
+ return SI_NUM_IMAGE_SLOTS / 2 + slot;
}
static inline unsigned si_get_image_slot(unsigned slot)
{
- /* images are in slots [15..0] (sampler slots [7..0]), descending */
- return SI_NUM_IMAGES - 1 - slot;
+ /* image slots are in [31..0] (sampler slots [15..0]), descending */
+ /* images are in slots [31..16], while FMASKs are in slots [15..0] */
+ return SI_NUM_IMAGE_SLOTS - 1 - slot;
}
#endif
uint32_t *const_and_shader_buffers,
uint64_t *samplers_and_images)
{
- unsigned start, num_shaderbufs, num_constbufs, num_images, num_samplers;
+ unsigned start, num_shaderbufs, num_constbufs, num_images, num_msaa_images, num_samplers;
num_shaderbufs = util_last_bit(info->shader_buffers_declared);
num_constbufs = util_last_bit(info->const_buffers_declared);
/* two 8-byte images share one 16-byte slot */
num_images = align(util_last_bit(info->images_declared), 2);
+ num_msaa_images = align(util_last_bit(info->msaa_images_declared), 2);
num_samplers = util_last_bit(info->samplers_declared);
/* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */
*const_and_shader_buffers =
u_bit_consecutive(start, num_shaderbufs + num_constbufs);
- /* The layout is: image[last] ... image[0], sampler[0] ... sampler[last] */
+ /* The layout is:
+ * - fmask[last] ... fmask[0] go to [15-last .. 15]
+ * - image[last] ... image[0] go to [31-last .. 31]
+ * - sampler[0] ... sampler[last] go to [32 .. 32+last*2]
+ *
+ * FMASKs for images are placed separately, because MSAA images are rare,
+ * and so we can benefit from a better cache hit rate if we keep image
+ * descriptors together.
+ */
+ if (num_msaa_images)
+ num_images = SI_NUM_IMAGES + num_msaa_images; /* add FMASK descriptors */
+
start = si_get_image_slot(num_images - 1) / 2;
*samplers_and_images =
u_bit_consecutive64(start, num_images / 2 + num_samplers);