#include "si_pipe.h"
#include "sid.h"
-#include "gfx9d.h"
#include "util/hash_table.h"
#include "util/u_idalloc.h"
if (sscreen->info.chip_class >= GFX8) {
state[6] &= C_008F28_COMPRESSION_EN;
- state[7] = 0;
if (vi_dcc_enabled(tex, first_level)) {
meta_va = (!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) +
- tex->dcc_offset;
+ tex->surface.dcc_offset;
if (sscreen->info.chip_class == GFX8) {
meta_va += base_level_info->dcc_offset;
assert(base_level_info->mode == RADEON_SURF_MODE_2D);
}
- meta_va |= (uint32_t)tex->surface.tile_swizzle << 8;
- } else if (vi_tc_compat_htile_enabled(tex, first_level)) {
- meta_va = tex->buffer.gpu_address + tex->htile_offset;
+ unsigned dcc_tile_swizzle = tex->surface.tile_swizzle << 8;
+ dcc_tile_swizzle &= tex->surface.dcc_alignment - 1;
+ meta_va |= dcc_tile_swizzle;
+ } else if (vi_tc_compat_htile_enabled(tex, first_level,
+ is_stencil ? PIPE_MASK_S : PIPE_MASK_Z)) {
+ meta_va = tex->buffer.gpu_address + tex->surface.htile_offset;
}
- if (meta_va) {
+ if (meta_va)
state[6] |= S_008F28_COMPRESSION_EN(1);
- state[7] = meta_va >> 8;
- }
}
- if (sscreen->info.chip_class >= GFX9) {
+ if (sscreen->info.chip_class >= GFX8 && sscreen->info.chip_class <= GFX9)
+ state[7] = meta_va >> 8;
+
+ if (sscreen->info.chip_class >= GFX10) {
+ state[3] &= C_00A00C_SW_MODE;
+
+ if (is_stencil) {
+ state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.stencil.swizzle_mode);
+ } else {
+ state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode);
+ }
+
+ state[6] &= C_00A018_META_DATA_ADDRESS_LO &
+ C_00A018_META_PIPE_ALIGNED;
+
+ if (meta_va) {
+ struct gfx9_surf_meta_flags meta;
+
+ if (tex->surface.dcc_offset)
+ meta = tex->surface.u.gfx9.dcc;
+ else
+ meta = tex->surface.u.gfx9.htile;
+
+ state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
+ S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
+ }
+
+ state[7] = meta_va >> 16;
+ } else if (sscreen->info.chip_class == GFX9) {
state[3] &= C_008F1C_SW_MODE;
state[4] &= C_008F20_PITCH;
if (meta_va) {
struct gfx9_surf_meta_flags meta;
- if (tex->dcc_offset)
+ if (tex->surface.dcc_offset)
meta = tex->surface.u.gfx9.dcc;
else
meta = tex->surface.u.gfx9.htile;
{
return tex->surface.fmask_size ||
(tex->dirty_level_mask &&
- (tex->cmask_buffer || tex->dcc_offset));
+ (tex->cmask_buffer || tex->surface.dcc_offset));
}
static bool depth_needs_decompression(struct si_texture *tex)
samplers->needs_color_decompress_mask &= ~(1u << slot);
}
- if (tex->dcc_offset &&
+ if (tex->surface.dcc_offset &&
p_atomic_read(&tex->framebuffers_bound))
sctx->need_check_render_feedback = true;
}
if (res->b.b.target != PIPE_BUFFER)
return;
- util_range_add(&res->valid_buffer_range,
+ util_range_add(&res->b.b, &res->valid_buffer_range,
view->u.buf.offset,
view->u.buf.offset + view->u.buf.size);
}
bool uses_dcc = vi_dcc_enabled(tex, level);
unsigned access = view->access;
- /* Clear the write flag when writes can't occur.
- * Note that DCC_DECOMPRESS for MSAA doesn't work in some cases,
- * so we don't wanna trigger it.
- */
- if (tex->is_depth ||
- (!fmask_desc && tex->surface.fmask_size != 0)) {
- assert(!"Z/S and MSAA image stores are not supported");
- access &= ~PIPE_IMAGE_ACCESS_WRITE;
- }
-
assert(!tex->is_depth);
- assert(fmask_desc || tex->surface.fmask_size == 0);
+ assert(fmask_desc || tex->surface.fmask_offset == 0);
if (uses_dcc && !skip_decompress &&
- (view->access & PIPE_IMAGE_ACCESS_WRITE ||
- !vi_dcc_formats_compatible(res->b.b.format, view->format))) {
+ (access & PIPE_IMAGE_ACCESS_WRITE ||
+ !vi_dcc_formats_compatible(screen, res->b.b.format, view->format))) {
/* If DCC can't be disabled, at least decompress it.
* The decompression is relatively cheap if the surface
* has been decompressed already.
hw_level = 0;
}
- si_make_texture_descriptor(screen, tex,
+ screen->make_texture_descriptor(screen, tex,
false, res->b.b.target,
view->format, swizzle,
hw_level, hw_level,
struct si_images *images = &ctx->images[shader];
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
struct si_resource *res;
- unsigned desc_slot = si_get_image_slot(slot);
- uint32_t *desc = descs->list + desc_slot * 8;
if (!view || !view->resource) {
si_disable_shader_image(ctx, shader, slot);
if (&images->views[slot] != view)
util_copy_image_view(&images->views[slot], view);
- si_set_shader_image_desc(ctx, view, skip_decompress, desc, NULL);
+ si_set_shader_image_desc(ctx, view, skip_decompress,
+ descs->list + si_get_image_slot(slot) * 8,
+ descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8);
if (res->b.b.target == PIPE_BUFFER ||
view->shader_access & SI_IMAGE_ACCESS_AS_BUFFER) {
/* See whether FBFETCH is used and color buffer 0 is set. */
if (sctx->ps_shader.cso &&
- sctx->ps_shader.cso->info.opcode_count[TGSI_OPCODE_FBFETCH] &&
+ sctx->ps_shader.cso->info.uses_fbfetch &&
sctx->framebuffer.state.nr_cbufs &&
sctx->framebuffer.state.cbufs[0])
surf = sctx->framebuffer.state.cbufs[0];
int64_t offset = (int64_t)((int)vb->buffer_offset) +
velems->src_offset[i];
+
+ if (offset >= buf->b.b.width0) {
+ assert(offset < buf->b.b.width0);
+ memset(desc, 0, 16);
+ continue;
+ }
+
uint64_t va = buf->gpu_address + offset;
int64_t num_records = (int64_t)buf->b.b.width0 - offset;
}
assert(num_records >= 0 && num_records <= UINT_MAX);
+ uint32_t rsrc_word3 = velems->rsrc_word3[i];
+
+ /* OOB_SELECT chooses the out-of-bounds check:
+ * - 1: index >= NUM_RECORDS (Structured)
+ * - 3: offset >= NUM_RECORDS (Raw)
+ */
+ if (sctx->chip_class >= GFX10)
+ rsrc_word3 |= S_008F0C_OOB_SELECT(vb->stride ? 1 : 3);
+
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(vb->stride);
desc[2] = num_records;
- desc[3] = velems->rsrc_word3[i];
+ desc[3] = rsrc_word3;
if (first_vb_use_mask & (1 << i)) {
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (sctx->chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
buffers->buffers[slot] = buffer;
buffers->offsets[slot] = buffer_offset;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (sctx->chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(3) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
buffers->offsets[slot] = sbuffer->buffer_offset;
buffers->enabled_mask |= 1u << slot;
sctx->descriptors_dirty |= 1u << descriptors_idx;
- util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset,
+ util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
sbuffer->buffer_offset + sbuffer->buffer_size);
}
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
S_008F0C_INDEX_STRIDE(index_stride) |
S_008F0C_ADD_TID_ENABLE(add_tid);
else
desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
+ if (sctx->chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(2) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
pipe_resource_reference(&buffers->buffers[slot], buffer);
radeon_add_to_buffer_list(sctx, sctx->gfx_cs,
si_resource(buffer),
*/
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
SI_CONTEXT_CS_PARTIAL_FLUSH;
- si_emit_cache_flush(sctx);
+ sctx->emit_cache_flush(sctx);
util_dynarray_foreach(&sctx->resident_tex_handles,
struct si_texture_handle *, tex_handle) {
}
/* Invalidate L1 because it doesn't know that L2 changed. */
- sctx->flags |= SI_CONTEXT_INV_SMEM_L1;
- si_emit_cache_flush(sctx);
+ sctx->flags |= SI_CONTEXT_INV_SCACHE;
+ sctx->emit_cache_flush(sctx);
sctx->bindless_descriptors_dirty = false;
}
struct si_descriptors *desc = &sctx->bindless_descriptors;
unsigned desc_slot_offset = img_handle->desc_slot * 16;
struct pipe_image_view *view = &img_handle->view;
- uint32_t desc_list[8];
+ struct pipe_resource *res = view->resource;
+ uint32_t image_desc[16];
+ unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4;
- if (view->resource->target == PIPE_BUFFER)
+ if (res->target == PIPE_BUFFER)
return;
- memcpy(desc_list, desc->list + desc_slot_offset,
- sizeof(desc_list));
+ memcpy(image_desc, desc->list + desc_slot_offset, desc_size);
si_set_shader_image_desc(sctx, view, true,
- desc->list + desc_slot_offset, NULL);
+ desc->list + desc_slot_offset,
+ desc->list + desc_slot_offset + 8);
- if (memcmp(desc_list, desc->list + desc_slot_offset,
- sizeof(desc_list))) {
+ if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) {
img_handle->desc_dirty = true;
sctx->bindless_descriptors_dirty = true;
}
}
}
-/* This must be called when these shaders are changed from non-NULL to NULL
- * and vice versa:
+/* This must be called when these are changed between enabled and disabled
* - geometry shader
- * - tessellation control shader
* - tessellation evaluation shader
+ * - NGG
*/
void si_shader_change_notify(struct si_context *sctx)
{
/* VS can be bound as VS, ES, or LS. */
if (sctx->tes_shader.cso) {
- if (sctx->chip_class >= GFX9) {
+ if (sctx->chip_class >= GFX10) {
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0);
+ } else if (sctx->chip_class == GFX9) {
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B430_SPI_SHADER_USER_DATA_LS_0);
} else {
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B530_SPI_SHADER_USER_DATA_LS_0);
}
+ } else if (sctx->chip_class >= GFX10) {
+ if (sctx->ngg || sctx->gs_shader.cso) {
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ } else {
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ }
} else if (sctx->gs_shader.cso) {
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B330_SPI_SHADER_USER_DATA_ES_0);
/* TES can be bound as ES, VS, or not bound. */
if (sctx->tes_shader.cso) {
- if (sctx->gs_shader.cso)
+ if (sctx->chip_class >= GFX10) {
+ if (sctx->ngg || sctx->gs_shader.cso) {
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ } else {
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ }
+ } else if (sctx->gs_shader.cso) {
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
R_00B330_SPI_SHADER_USER_DATA_ES_0);
- else
+ } else {
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ }
} else {
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL, 0);
}
static void si_emit_global_shader_pointers(struct si_context *sctx,
struct si_descriptors *descs)
{
- if (sctx->chip_class == GFX9) {
+ if (sctx->chip_class >= GFX10) {
+ si_emit_shader_pointer(sctx, descs,
+ R_00B030_SPI_SHADER_USER_DATA_PS_0);
+ /* HW VS stage only used in non-NGG mode. */
+ si_emit_shader_pointer(sctx, descs,
+ R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0);
+ return;
+ } else if (sctx->chip_class == GFX9) {
/* Broadcast it to all shader stages. */
si_emit_shader_pointer(sctx, descs,
R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
short shader_userdata_rel_index,
unsigned num_elements)
{
- MAYBE_UNUSED unsigned desc_slot;
+ ASSERTED unsigned desc_slot;
si_init_descriptors(desc, shader_userdata_rel_index, 16, num_elements);
sctx->bindless_descriptors.num_active_slots = num_elements;
tex_handle);
}
- if (tex->dcc_offset &&
+ if (tex->surface.dcc_offset &&
p_atomic_read(&tex->framebuffers_bound))
sctx->need_check_render_feedback = true;
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_image_handle *img_handle;
- uint32_t desc_list[8];
+ uint32_t desc_list[16];
uint64_t handle;
if (!view || !view->resource)
return 0;
memset(desc_list, 0, sizeof(desc_list));
- si_init_descriptor_list(&desc_list[0], 8, 1, null_image_descriptor);
+ si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor);
- si_set_shader_image_desc(sctx, view, false, &desc_list[0], NULL);
+ si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]);
img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list,
sizeof(desc_list));
bool is_2nd = sctx->chip_class >= GFX9 &&
(i == PIPE_SHADER_TESS_CTRL ||
i == PIPE_SHADER_GEOMETRY);
- unsigned num_sampler_slots = SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS;
+ unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS;
unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
int rel_dw_offset;
struct si_descriptors *desc;
if (i == PIPE_SHADER_TESS_CTRL) {
rel_dw_offset = (R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS -
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
- } else { /* PIPE_SHADER_GEOMETRY */
+ } else if (sctx->chip_class >= GFX10) { /* PIPE_SHADER_GEOMETRY */
+ rel_dw_offset = (R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS -
+ R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;
+ } else {
rel_dw_offset = (R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS -
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
}
if (i == PIPE_SHADER_TESS_CTRL) {
rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS -
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
- } else { /* PIPE_SHADER_GEOMETRY */
+ } else if (sctx->chip_class >= GFX10) { /* PIPE_SHADER_GEOMETRY */
+ rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
+ R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;
+ } else {
rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
}
si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);
int j;
- for (j = 0; j < SI_NUM_IMAGES; j++)
+ for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++)
memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
- for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
+ for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++)
memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
}
sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers;
/* Set default and immutable mappings. */
- si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ if (sctx->ngg) {
+ assert(sctx->chip_class >= GFX10);
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ } else {
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ }
- if (sctx->chip_class >= GFX9) {
+ if (sctx->chip_class == GFX9) {
si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
R_00B430_SPI_SHADER_USER_DATA_LS_0);
si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,