* the only client not using TC that can change textures is
* the framebuffer.
*
- * Flush all CB and DB caches here because all buffers can be used
- * for write by both TC (with shader image stores) and CB/DB.
+ * Wait for compute shaders because of possible transitions:
+ * - FB write -> shader read
+ * - shader write -> FB read
+ *
+ * DB caches are flushed on demand (using si_decompress_textures).
+ *
+ * When MSAA is enabled, CB and TC caches are flushed on demand
+ * (after FMASK decompression). Shader write -> FB read transitions
+ * cannot happen for MSAA textures, because MSAA shader images are
+ * not supported.
+ *
+ * Only flush and wait for CB if there is actually a bound color buffer.
+ */
+ if (sctx->framebuffer.nr_samples <= 1 &&
+ sctx->framebuffer.state.nr_cbufs) {
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_FLUSH_AND_INV_CB;
+ }
+ sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+ /* u_blitter doesn't invoke depth decompression when it does multiple
+ * blits in a row, but the only case when it matters for DB is when
+ * doing generate_mipmap. So here we flush DB manually between
+ * individual generate_mipmap blits.
+ * Note that lower mipmap levels aren't compressed.
*/
- sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB |
- SI_CONTEXT_CS_PARTIAL_FLUSH;
+ if (sctx->generate_mipmap_for_depth) {
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_FLUSH_AND_INV_DB;
+ }
/* Take the maximum of the old and new count. If the new count is lower,
* dirtying is needed to disable the unbound colorbuffers.
data_format = V_008F14_IMG_DATA_FORMAT_24_8;
}
+ /* S8 with Z32 HTILE needs a special format. */
+ if (screen->b.chip_class >= GFX9 &&
+ pipe_format == PIPE_FORMAT_S8_UINT &&
+ tex->tc_compatible_htile)
+ data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
+
if (!sampler &&
(res->target == PIPE_TEXTURE_CUBE ||
res->target == PIPE_TEXTURE_CUBE_ARRAY ||
const struct pipe_vertex_element *elements)
{
struct si_screen *sscreen = (struct si_screen*)ctx->screen;
- struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element);
+ struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements);
bool used[SI_NUM_VERTEX_BUFFERS] = {};
int i;
return NULL;
}
- if (elements[i].instance_divisor)
+ if (elements[i].instance_divisor) {
v->uses_instance_divisors = true;
+ v->instance_divisors[i] = elements[i].instance_divisor;
+ }
if (!used[vbo_index]) {
v->first_vb_use_mask |= 1 << i;
memcpy(swizzle, desc->swizzle, sizeof(swizzle));
v->format_size[i] = desc->block.bits / 8;
+ v->src_offset[i] = elements[i].src_offset;
+ v->vertex_buffer_index[i] = vbo_index;
/* The hardware always treats the 2-bit alpha channel as
* unsigned, so a shader workaround is needed. The affected
S_008F0C_NUM_FORMAT(num_format) |
S_008F0C_DATA_FORMAT(data_format);
}
- memcpy(v->elements, elements, sizeof(struct pipe_vertex_element) * count);
-
return v;
}
static void si_bind_vertex_elements(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
- struct si_vertex_element *old = sctx->vertex_elements;
- struct si_vertex_element *v = (struct si_vertex_element*)state;
+ struct si_vertex_elements *old = sctx->vertex_elements;
+ struct si_vertex_elements *v = (struct si_vertex_elements*)state;
sctx->vertex_elements = v;
sctx->vertex_buffers_dirty = true;
{
struct si_context *sctx = (struct si_context *)ctx;
- sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
- SI_CONTEXT_INV_GLOBAL_L2 |
- SI_CONTEXT_FLUSH_AND_INV_CB;
+ /* Multisample surfaces are flushed in si_decompress_textures. */
+ if (sctx->framebuffer.nr_samples <= 1) {
+ sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
+ SI_CONTEXT_INV_GLOBAL_L2 |
+ SI_CONTEXT_FLUSH_AND_INV_CB;
+ }
sctx->framebuffer.do_update_surf_dirtiness = true;
}
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
- if (flags & PIPE_BARRIER_FRAMEBUFFER)
+ /* MSAA color, any depth and any stencil are flushed in
+ * si_decompress_textures when needed.
+ */
+ if (flags & PIPE_BARRIER_FRAMEBUFFER &&
+ sctx->framebuffer.nr_samples <= 1) {
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
- SI_CONTEXT_FLUSH_AND_INV_DB;
+ SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+ }
- if (flags & (PIPE_BARRIER_FRAMEBUFFER |
- PIPE_BARRIER_INDIRECT_BUFFER))
+ /* Indirect buffers use TC L2 on GFX9, but not older hw. */
+ if (sctx->screen->b.chip_class <= VI &&
+ flags & PIPE_BARRIER_INDIRECT_BUFFER)
sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}