#include "si_pipe.h"
#include "si_compute.h"
+#include "si_build_pm4.h"
#include "sid.h"
#include "util/format/u_format.h"
#include "util/hash_table.h"
return true;
}
-static void si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc)
+static void
+si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *desc)
{
if (!desc->buffer)
return;
state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.stencil.epitch);
} else {
+ uint16_t epitch = tex->surface.u.gfx9.surf.epitch;
+ if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM &&
+ block_width == 1) {
+ /* epitch is patched in ac_surface for sdma/vcn blocks to get
+ * a value expressed in elements unit.
+ * But here the texture is used with block_width == 1 so we
+ * need epitch in pixel units.
+ */
+ epitch = (epitch + 1) / tex->surface.blk_w - 1;
+ }
state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
- state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.surf.epitch);
+ state[4] |= S_008F20_PITCH(epitch);
}
state[5] &=
samplers->needs_color_decompress_mask &= ~(1u << slot);
}
- if (tex->surface.dcc_offset && p_atomic_read(&tex->framebuffers_bound))
+ if (vi_dcc_enabled(tex, view->u.tex.first_level) &&
+ p_atomic_read(&tex->framebuffers_bound))
sctx->need_check_render_feedback = true;
}
si_set_shader_image(ctx, shader, slot, NULL, false);
}
+ if (shader == PIPE_SHADER_COMPUTE &&
+ ctx->cs_shader_state.program &&
+ start_slot < ctx->cs_shader_state.program->sel.cs_num_images_in_user_sgprs)
+ ctx->compute_image_sgprs_dirty = true;
+
si_update_shader_needs_decompress_mask(ctx, shader);
}
pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b);
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_RW_IMAGE);
- buffers->enabled_mask |= 1u << slot;
+ buffers->enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
memset(descs->list + slot * 4, 0, 8 * 4);
pipe_resource_reference(&buffers->buffers[slot], NULL);
- buffers->enabled_mask &= ~(1u << slot);
+ buffers->enabled_mask &= ~(1llu << slot);
}
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
struct si_buffer_resources *buffers)
{
- unsigned mask = buffers->enabled_mask;
+ uint64_t mask = buffers->enabled_mask;
/* Add buffers to the CS. */
while (mask) {
- int i = u_bit_scan(&mask);
+ int i = u_bit_scan64(&mask);
radeon_add_to_buffer_list(
sctx, sctx->gfx_cs, si_resource(buffers->buffers[i]),
- buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
+ buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf);
}
}
static bool si_buffer_resources_check_encrypted(struct si_context *sctx,
struct si_buffer_resources *buffers)
{
- unsigned mask = buffers->enabled_mask;
+ uint64_t mask = buffers->enabled_mask;
while (mask) {
- int i = u_bit_scan(&mask);
+ int i = u_bit_scan64(&mask);
/* only check for reads */
- if ((buffers->writable_mask & (1u << i)) == 0 &&
+ if ((buffers->writable_mask & (1llu << i)) == 0 &&
(si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED))
return true;
}
buffers->offsets[slot] = buffer_offset;
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,
buffers->priority_constbuf, true);
- buffers->enabled_mask |= 1u << slot;
+ buffers->enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
- buffers->enabled_mask &= ~(1u << slot);
+ buffers->enabled_mask &= ~(1llu << slot);
}
sctx->descriptors_dirty |= 1u << descriptors_idx;
if (!sbuffer || !sbuffer->buffer) {
pipe_resource_reference(&buffers->buffers[slot], NULL);
memset(desc, 0, sizeof(uint32_t) * 4);
- buffers->enabled_mask &= ~(1u << slot);
- buffers->writable_mask &= ~(1u << slot);
+ buffers->enabled_mask &= ~(1llu << slot);
+ buffers->writable_mask &= ~(1llu << slot);
sctx->descriptors_dirty |= 1u << descriptors_idx;
return;
}
radeon_add_to_gfx_buffer_list_check_mem(
sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true);
if (writable)
- buffers->writable_mask |= 1u << slot;
+ buffers->writable_mask |= 1llu << slot;
else
- buffers->writable_mask &= ~(1u << slot);
+ buffers->writable_mask &= ~(1llu << slot);
- buffers->enabled_mask |= 1u << slot;
- sctx->descriptors_dirty |= 1u << descriptors_idx;
+ buffers->enabled_mask |= 1llu << slot;
+ sctx->descriptors_dirty |= 1lu << descriptors_idx;
util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
sbuffer->buffer_offset + sbuffer->buffer_size);
assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
+ if (shader == PIPE_SHADER_COMPUTE &&
+ sctx->cs_shader_state.program &&
+ start_slot < sctx->cs_shader_state.program->sel.cs_num_shaderbufs_in_user_sgprs)
+ sctx->compute_shaderbuf_sgprs_dirty = true;
+
for (i = 0; i < count; ++i) {
const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
unsigned slot = si_get_shaderbuf_slot(start_slot + i);
pipe_resource_reference(&buffers->buffers[slot], buffer);
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE,
buffers->priority);
- buffers->enabled_mask |= 1u << slot;
+ buffers->enabled_mask |= 1llu << slot;
} else {
/* Clear the descriptor. */
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
- buffers->enabled_mask &= ~(1u << slot);
+ buffers->enabled_mask &= ~(1llu << slot);
}
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
* If buf == NULL, reset all descriptors.
*/
static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
- unsigned descriptors_idx, unsigned slot_mask,
+ unsigned descriptors_idx, uint64_t slot_mask,
struct pipe_resource *buf, enum radeon_bo_priority priority)
{
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
- unsigned mask = buffers->enabled_mask & slot_mask;
+ uint64_t mask = buffers->enabled_mask & slot_mask;
while (mask) {
- unsigned i = u_bit_scan(&mask);
+ unsigned i = u_bit_scan64(&mask);
struct pipe_resource *buffer = buffers->buffers[i];
if (buffer && (!buf || buffer == buf)) {
radeon_add_to_gfx_buffer_list_check_mem(
sctx, si_resource(buffer),
- buffers->writable_mask & (1u << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
+ buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
priority, true);
}
}
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
si_const_and_shader_buffer_descriptors_idx(shader),
- u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
+ u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
buf, sctx->const_and_shader_buffers[shader].priority_constbuf);
}
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
si_const_and_shader_buffer_descriptors_idx(shader),
- u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), buf,
+ u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf,
sctx->const_and_shader_buffers[shader].priority);
}
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
}
-static void si_shader_pointers_begin_new_cs(struct si_context *sctx)
+void si_shader_pointers_mark_dirty(struct si_context *sctx)
{
sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL;
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
+ sctx->compute_shaderbuf_sgprs_dirty = true;
+ sctx->compute_image_sgprs_dirty = true;
}
/* Set a base register address for user data constants in the given shader.
static void si_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset,
unsigned pointer_count)
{
+ SI_CHECK_SHADOWED_REGS(sh_offset, pointer_count);
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0));
radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
}
si_emit_shader_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
return;
+ } else if (sctx->chip_class == GFX9 && sctx->shadowed_regs) {
+ /* We can't use the COMMON registers with register shadowing. */
+ si_emit_shader_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
+ si_emit_shader_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ si_emit_shader_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ si_emit_shader_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0);
+ return;
} else if (sctx->chip_class == GFX9) {
/* Broadcast it to all shader stages. */
si_emit_shader_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
void si_emit_compute_shader_pointers(struct si_context *sctx)
{
+ struct radeon_cmdbuf *cs = sctx->gfx_cs;
+ struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
unsigned base = R_00B900_COMPUTE_USER_DATA_0;
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
si_emit_shader_pointer(sctx, &sctx->bindless_descriptors, base);
sctx->compute_bindless_pointer_dirty = false;
}
+
+ /* Set shader buffer descriptors in user SGPRs. */
+ unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs;
+ if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {
+ struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE);
+
+ si_emit_shader_pointer_head(cs, R_00B900_COMPUTE_USER_DATA_0 +
+ shader->cs_shaderbufs_sgpr_index * 4,
+ num_shaderbufs * 4);
+
+ for (unsigned i = 0; i < num_shaderbufs; i++)
+ radeon_emit_array(cs, &desc->list[si_get_shaderbuf_slot(i) * 4], 4);
+
+ sctx->compute_shaderbuf_sgprs_dirty = false;
+ }
+
+ /* Set image descriptors in user SGPRs. */
+ unsigned num_images = shader->cs_num_images_in_user_sgprs;
+ if (num_images && sctx->compute_image_sgprs_dirty) {
+ struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE);
+
+ si_emit_shader_pointer_head(cs, R_00B900_COMPUTE_USER_DATA_0 +
+ shader->cs_images_sgpr_index * 4,
+ shader->cs_images_num_sgprs);
+
+ for (unsigned i = 0; i < num_images; i++) {
+ unsigned desc_offset = si_get_image_slot(i) * 8;
+ unsigned num_sgprs = 8;
+
+ /* Image buffers are in desc[4..7]. */
+ if (shader->info.image_buffers & (1 << i)) {
+ desc_offset += 4;
+ num_sgprs = 4;
+ }
+
+ radeon_emit_array(cs, &desc->list[desc_offset], num_sgprs);
+ }
+
+ sctx->compute_image_sgprs_dirty = false;
+ }
}
/* BINDLESS */
struct si_texture_handle *, tex_handle);
}
- if (tex->surface.dcc_offset && p_atomic_read(&tex->framebuffers_bound))
+ if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) &&
+ p_atomic_read(&tex->framebuffers_bound))
sctx->need_check_render_feedback = true;
si_update_bindless_texture_descriptor(sctx, tex_handle);
sctx->bo_list_add_all_compute_resources = false;
}
-void si_all_descriptors_begin_new_cs(struct si_context *sctx)
+void si_add_all_descriptors_to_bo_list(struct si_context *sctx)
{
for (unsigned i = 0; i < SI_NUM_DESCS; ++i)
- si_descriptors_begin_new_cs(sctx, &sctx->descriptors[i]);
- si_descriptors_begin_new_cs(sctx, &sctx->bindless_descriptors);
-
- si_shader_pointers_begin_new_cs(sctx);
+ si_add_descriptors_to_bo_list(sctx, &sctx->descriptors[i]);
+ si_add_descriptors_to_bo_list(sctx, &sctx->bindless_descriptors);
sctx->bo_list_add_all_resident_resources = true;
sctx->bo_list_add_all_gfx_resources = true;
if (!sel)
return;
- si_set_active_descriptors(sctx, si_const_and_shader_buffer_descriptors_idx(sel->type),
+ si_set_active_descriptors(sctx, sel->const_and_shader_buf_descriptors_index,
sel->active_const_and_shader_buffers);
- si_set_active_descriptors(sctx, si_sampler_and_image_descriptors_idx(sel->type),
+ si_set_active_descriptors(sctx, sel->sampler_and_images_descriptors_index,
sel->active_samplers_and_images);
}