va += base_level_info->offset;
}
- if (tex->dcc_offset && first_level < tex->surface.num_dcc_levels) {
- meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
- tex->dcc_offset;
-
- if (sscreen->b.chip_class <= VI)
- meta_va += base_level_info->dcc_offset;
- } else if (tex->tc_compatible_htile && !is_stencil) {
- meta_va = tex->htile_buffer->gpu_address;
- }
-
state[0] = va >> 8;
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
- state[6] &= C_008F28_COMPRESSION_EN;
- state[7] = 0;
+ if (sscreen->b.chip_class >= VI) {
+ state[6] &= C_008F28_COMPRESSION_EN;
+ state[7] = 0;
+
+ if (vi_dcc_enabled(tex, first_level)) {
+ meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
+ tex->dcc_offset;
+
+ if (sscreen->b.chip_class <= VI)
+ meta_va += base_level_info->dcc_offset;
+ } else if (tex->tc_compatible_htile && !is_stencil) {
+ meta_va = tex->htile_buffer->gpu_address;
+ }
- if (meta_va) {
- state[6] |= S_008F28_COMPRESSION_EN(1);
- state[7] = meta_va >> 8;
+ if (meta_va) {
+ state[6] |= S_008F28_COMPRESSION_EN(1);
+ state[7] = meta_va >> 8;
+ }
}
if (sscreen->b.chip_class >= GFX9) {
if (view) {
struct r600_texture *rtex = (struct r600_texture *)view->texture;
+ bool is_buffer = rtex->resource.b.b.target == PIPE_BUFFER;
+
+ if (unlikely(!is_buffer && rview->dcc_incompatible)) {
+ if (vi_dcc_enabled(rtex, view->u.tex.first_level))
+ if (!r600_texture_disable_dcc(&sctx->b, rtex))
+ sctx->b.decompress_dcc(&sctx->b.b, rtex);
+
+ rview->dcc_incompatible = false;
+ }
assert(rtex); /* views with texture == NULL aren't supported */
pipe_sampler_view_reference(&views->views[slot], view);
memcpy(desc, rview->state, 8*4);
- if (rtex->resource.b.b.target == PIPE_BUFFER) {
+ if (is_buffer) {
rtex->resource.bind_history |= PIPE_BIND_SAMPLER_VIEW;
si_set_buf_desc_address(&rtex->resource,
desc);
}
- if (rtex->resource.b.b.target != PIPE_BUFFER &&
- rtex->fmask.size) {
+ if (!is_buffer && rtex->fmask.size) {
memcpy(desc + 8,
rview->fmask_state, 8*4);
} else {
static bool is_compressed_colortex(struct r600_texture *rtex)
{
- return rtex->cmask.size || rtex->fmask.size ||
- (rtex->dcc_offset && rtex->dirty_level_mask);
+ return rtex->fmask.size ||
+ (rtex->dirty_level_mask &&
+ (rtex->cmask.size || rtex->dcc_offset));
+}
+
+static bool depth_needs_decompression(struct r600_texture *rtex,
+ struct si_sampler_view *sview)
+{
+ return rtex->db_compatible &&
+ (!rtex->tc_compatible_htile || sview->is_stencil_sampler);
}
static void si_update_compressed_tex_shader_mask(struct si_context *sctx,
(struct r600_texture*)views[i]->texture;
struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
- if (rtex->db_compatible &&
- (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
+ if (depth_needs_decompression(rtex, rview)) {
samplers->depth_texture_mask |= 1u << slot;
} else {
samplers->depth_texture_mask &= ~(1u << slot);
si_make_buffer_descriptor(screen, res,
view->format,
view->u.buf.offset,
- view->u.buf.size,
- descs->list + slot * 8);
+ view->u.buf.size, desc);
si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
images->compressed_colortex_mask &= ~(1 << slot);
static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
struct r600_texture *tex = (struct r600_texture *)res;
unsigned level = view->u.tex.level;
- unsigned width, height, depth;
- bool uses_dcc = tex->dcc_offset &&
- level < tex->surface.num_dcc_levels;
+ unsigned width, height, depth, hw_level;
+ bool uses_dcc = vi_dcc_enabled(tex, level);
assert(!tex->is_depth);
assert(tex->fmask.size == 0);
p_atomic_read(&tex->framebuffers_bound))
ctx->need_check_render_feedback = true;
- /* Always force the base level to the selected level.
- *
- * This is required for 3D textures, where otherwise
- * selecting a single slice for non-layered bindings
- * fails. It doesn't hurt the other targets.
- */
- width = u_minify(res->b.b.width0, level);
- height = u_minify(res->b.b.height0, level);
- depth = u_minify(res->b.b.depth0, level);
+ if (ctx->b.chip_class >= GFX9) {
+ /* Always set the base address. The swizzle modes don't
+ * allow setting mipmap level offsets as the base.
+ */
+ width = res->b.b.width0;
+ height = res->b.b.height0;
+ depth = res->b.b.depth0;
+ hw_level = level;
+ } else {
+ /* Always force the base level to the selected level.
+ *
+ * This is required for 3D textures, where otherwise
+ * selecting a single slice for non-layered bindings
+ * fails. It doesn't hurt the other targets.
+ */
+ width = u_minify(res->b.b.width0, level);
+ height = u_minify(res->b.b.height0, level);
+ depth = u_minify(res->b.b.depth0, level);
+ hw_level = 0;
+ }
si_make_texture_descriptor(screen, tex,
false, res->b.b.target,
view->format, swizzle,
- 0, 0,
+ hw_level, hw_level,
view->u.tex.first_layer,
view->u.tex.last_layer,
width, height, depth,
if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
continue;
- if (!sctx->vertex_buffer[vb].buffer)
+ if (!sctx->vertex_buffer[vb].buffer.resource)
continue;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource*)sctx->vertex_buffer[vb].buffer,
+ (struct r600_resource*)sctx->vertex_buffer[vb].buffer.resource,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
}
uint32_t *desc = &ptr[i*4];
vb = &sctx->vertex_buffer[vbo_index];
- rbuffer = (struct r600_resource*)vb->buffer;
+ rbuffer = (struct r600_resource*)vb->buffer.resource;
if (!rbuffer) {
memset(desc, 0, 16);
continue;
if (sctx->b.chip_class != VI && vb->stride) {
/* Round up by rounding down and adding 1 */
- desc[2] = (vb->buffer->width0 - offset -
+ desc[2] = (vb->buffer.resource->width0 - offset -
velems->format_size[i]) /
vb->stride + 1;
} else {
- desc[2] = vb->buffer->width0 - offset;
+ desc[2] = vb->buffer.resource->width0 - offset;
}
desc[3] = velems->rsrc_word3[i];
if (first_vb_use_mask & (1 << i)) {
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
- (struct r600_resource*)vb->buffer,
+ (struct r600_resource*)vb->buffer.resource,
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
}
}
}
}
-/* Reallocate a buffer a update all resource bindings where the buffer is
- * bound.
- *
- * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
- * idle by discarding its contents. Apps usually tell us when to do this using
- * map_buffer flags, for example.
- */
-static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
+static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf,
+ uint64_t old_va)
{
struct si_context *sctx = (struct si_context*)ctx;
struct r600_resource *rbuffer = r600_resource(buf);
unsigned i, shader;
- uint64_t old_va = rbuffer->gpu_address;
unsigned num_elems = sctx->vertex_elements ?
sctx->vertex_elements->count : 0;
- /* Reallocate the buffer in the same pipe_resource. */
- r600_alloc_resource(&sctx->screen->b, rbuffer);
-
/* We changed the buffer, now we need to bind it where the old one
* was bound. This consists of 2 things:
* 1) Updating the resource descriptor and dirtying it.
if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
continue;
- if (!sctx->vertex_buffer[vb].buffer)
+ if (!sctx->vertex_buffer[vb].buffer.resource)
continue;
- if (sctx->vertex_buffer[vb].buffer == buf) {
+ if (sctx->vertex_buffer[vb].buffer.resource == buf) {
sctx->vertex_buffers_dirty = true;
break;
}
}
}
+/* Reallocate a buffer a update all resource bindings where the buffer is
+ * bound.
+ *
+ * This is used to avoid CPU-GPU synchronizations, because it makes the buffer
+ * idle by discarding its contents. Apps usually tell us when to do this using
+ * map_buffer flags, for example.
+ */
+static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource *buf)
+{
+ struct si_context *sctx = (struct si_context*)ctx;
+ struct r600_resource *rbuffer = r600_resource(buf);
+ uint64_t old_va = rbuffer->gpu_address;
+
+ /* Reallocate the buffer in the same pipe_resource. */
+ r600_alloc_resource(&sctx->screen->b, rbuffer);
+
+ si_rebind_buffer(ctx, buf, old_va);
+}
+
/* Update mutable image descriptor fields of all bound textures. */
void si_update_all_texture_descriptors(struct si_context *sctx)
{
if (*base != new_base) {
*base = new_base;
- if (new_base)
+ if (new_base) {
si_mark_shader_pointers_dirty(sctx, shader);
+
+ if (shader == PIPE_SHADER_VERTEX)
+ sctx->last_vs_state = ~0;
+ }
}
}
void si_shader_change_notify(struct si_context *sctx)
{
/* VS can be bound as VS, ES, or LS. */
- if (sctx->tes_shader.cso)
- si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
- R_00B530_SPI_SHADER_USER_DATA_LS_0);
- else if (sctx->gs_shader.cso)
+ if (sctx->tes_shader.cso) {
+ if (sctx->b.chip_class >= GFX9) {
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B430_SPI_SHADER_USER_DATA_LS_0);
+ } else {
+ si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
+ R_00B530_SPI_SHADER_USER_DATA_LS_0);
+ }
+ } else if (sctx->gs_shader.cso) {
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B330_SPI_SHADER_USER_DATA_ES_0);
- else
+ } else {
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
R_00B130_SPI_SHADER_USER_DATA_VS_0);
+ }
/* TES can be bound as ES, VS, or not bound. */
if (sctx->tes_shader.cso) {
R_00B030_SPI_SHADER_USER_DATA_PS_0);
si_emit_shader_pointer(sctx, descs,
R_00B130_SPI_SHADER_USER_DATA_VS_0);
- si_emit_shader_pointer(sctx, descs,
- R_00B230_SPI_SHADER_USER_DATA_GS_0);
- si_emit_shader_pointer(sctx, descs,
- R_00B330_SPI_SHADER_USER_DATA_ES_0);
- si_emit_shader_pointer(sctx, descs,
- R_00B430_SPI_SHADER_USER_DATA_HS_0);
+
+ if (sctx->b.chip_class >= GFX9) {
+ /* GFX9 merged LS-HS and ES-GS.
+ * Set RW_BUFFERS in the special registers, so that
+ * it's preloaded into s[0:1] instead of s[8:9].
+ */
+ si_emit_shader_pointer(sctx, descs,
+ R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS);
+ } else {
+ si_emit_shader_pointer(sctx, descs,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ si_emit_shader_pointer(sctx, descs,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0);
+ }
}
mask = sctx->shader_pointers_dirty &
int i;
unsigned ce_offset = 0;
+ STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0);
+ STATIC_ASSERT(GFX9_SGPR_GS_CONST_BUFFERS % 2 == 0);
+
for (i = 0; i < SI_NUM_SHADERS; i++) {
+ bool gfx9_tcs = sctx->b.chip_class == GFX9 &&
+ i == PIPE_SHADER_TESS_CTRL;
+ bool gfx9_gs = sctx->b.chip_class == GFX9 &&
+ i == PIPE_SHADER_GEOMETRY;
/* GFX9 has only 4KB of CE, while previous chips had 32KB.
* Rarely used descriptors don't use CE RAM.
*/
si_init_buffer_resources(&sctx->const_buffers[i],
si_const_buffer_descriptors(sctx, i),
- SI_NUM_CONST_BUFFERS, SI_SGPR_CONST_BUFFERS,
+ SI_NUM_CONST_BUFFERS,
+ gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS :
+ gfx9_gs ? GFX9_SGPR_GS_CONST_BUFFERS :
+ SI_SGPR_CONST_BUFFERS,
RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER,
&ce_offset);
si_init_buffer_resources(&sctx->shader_buffers[i],
si_shader_buffer_descriptors(sctx, i),
- SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
+ SI_NUM_SHADER_BUFFERS,
+ gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS :
+ gfx9_gs ? GFX9_SGPR_GS_SHADER_BUFFERS :
+ SI_SGPR_SHADER_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER,
shaderbufs_use_ce ? &ce_offset : NULL);
si_init_descriptors(si_sampler_descriptors(sctx, i),
- SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
+ gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
+ gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
+ SI_SGPR_SAMPLERS,
+ 16, SI_NUM_SAMPLERS,
null_texture_descriptor,
samplers_use_ce ? &ce_offset : NULL);
si_init_descriptors(si_image_descriptors(sctx, i),
- SI_SGPR_IMAGES, 8, SI_NUM_IMAGES,
+ gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
+ gfx9_gs ? GFX9_SGPR_GS_IMAGES :
+ SI_SGPR_IMAGES,
+ 8, SI_NUM_IMAGES,
null_image_descriptor,
images_use_ce ? &ce_offset : NULL);
}
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
sctx->b.invalidate_buffer = si_invalidate_buffer;
+ sctx->b.rebind_buffer = si_rebind_buffer;
/* Shader user data. */
si_init_atom(sctx, &sctx->shader_userdata.atom, &sctx->atoms.s.shader_userdata,
/* Set default and immutable mappings. */
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX, R_00B130_SPI_SHADER_USER_DATA_VS_0);
- si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL, R_00B430_SPI_SHADER_USER_DATA_HS_0);
- si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY, R_00B230_SPI_SHADER_USER_DATA_GS_0);
+
+ if (sctx->b.chip_class >= GFX9) {
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
+ R_00B430_SPI_SHADER_USER_DATA_LS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
+ R_00B330_SPI_SHADER_USER_DATA_ES_0);
+ } else {
+ si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0);
+ si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0);
+ }
si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
}