void cik_emit_prefetch_L2(struct si_context *sctx)
{
/* Prefetch shaders and VBO descriptors to TC L2. */
- if (si_pm4_state_enabled_and_changed(sctx, ls))
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_LS)
cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
- if (si_pm4_state_enabled_and_changed(sctx, hs))
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
- if (si_pm4_state_enabled_and_changed(sctx, es))
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
- if (si_pm4_state_enabled_and_changed(sctx, gs))
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (si_pm4_state_enabled_and_changed(sctx, vs))
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
/* Vertex buffer descriptors are uploaded uncached, so prefetch
* them right after the VS binary. */
- if (sctx->vertex_buffer_pointer_dirty) {
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS) {
cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b,
sctx->vertex_buffers.buffer_offset,
sctx->vertex_elements->desc_list_byte_size);
}
- if (si_pm4_state_enabled_and_changed(sctx, ps))
+ if (sctx->prefetch_L2_mask & SI_PREFETCH_PS)
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
- sctx->prefetch_L2 = false;
+ sctx->prefetch_L2_mask = 0;
}
void si_init_cp_dma_functions(struct si_context *sctx)
* uploaded to a fresh new buffer, so I don't think flushing the const
* cache is needed. */
si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom);
- if (sctx->b.chip_class >= CIK)
- sctx->prefetch_L2 = true;
sctx->vertex_buffers_dirty = false;
sctx->vertex_buffer_pointer_dirty = true;
+ sctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
return true;
}
if (ctx->ce_ib)
si_ce_restore_all_descriptors_at_ib_start(ctx);
- if (ctx->b.chip_class >= CIK)
- ctx->prefetch_L2 = true;
+ if (ctx->queued.named.ls)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_LS;
+ if (ctx->queued.named.hs)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_HS;
+ if (ctx->queued.named.es)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_ES;
+ if (ctx->queued.named.gs)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_GS;
+ if (ctx->queued.named.vs)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_VS;
+ if (ctx->queued.named.ps)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_PS;
+ if (ctx->vertex_buffers.buffer)
+ ctx->prefetch_L2_mask |= SI_PREFETCH_VBO_DESCRIPTORS;
/* CLEAR_STATE disables all colorbuffers, so only enable bound ones. */
ctx->framebuffer.dirty_cbufs =
#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 12)
#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 13)
+#define SI_PREFETCH_VBO_DESCRIPTORS (1 << 0)
+#define SI_PREFETCH_LS (1 << 1)
+#define SI_PREFETCH_HS (1 << 2)
+#define SI_PREFETCH_ES (1 << 3)
+#define SI_PREFETCH_GS (1 << 4)
+#define SI_PREFETCH_VS (1 << 5)
+#define SI_PREFETCH_PS (1 << 6)
+
#define SI_MAX_BORDER_COLORS 4096
#define SIX_BITS 0x3F
struct u_suballocator *ce_suballocator;
unsigned ce_ram_saved_offset;
uint16_t total_ce_ram_allocated;
+ uint16_t prefetch_L2_mask;
bool ce_need_synchronization:1;
bool gfx_flush_in_progress:1;
bool compute_is_busy:1;
- bool prefetch_L2:1;
/* Atoms (direct states). */
union si_state_atoms atoms;
if (sctx->b.flags)
si_emit_cache_flush(sctx);
- if (sctx->prefetch_L2)
+ if (sctx->b.chip_class >= CIK && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx);
/* Emit state atoms. */
return false;
}
- if (sctx->b.chip_class >= CIK)
- sctx->prefetch_L2 = true;
+ if (sctx->b.chip_class >= CIK) {
+ if (si_pm4_state_enabled_and_changed(sctx, ls))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_LS;
+ else if (!sctx->queued.named.ls)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, hs))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_HS;
+ else if (!sctx->queued.named.hs)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_HS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, es))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_ES;
+ else if (!sctx->queued.named.es)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_ES;
+
+ if (si_pm4_state_enabled_and_changed(sctx, gs))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_GS;
+ else if (!sctx->queued.named.gs)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_GS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, vs))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_VS;
+ else if (!sctx->queued.named.vs)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_VS;
+
+ if (si_pm4_state_enabled_and_changed(sctx, ps))
+ sctx->prefetch_L2_mask |= SI_PREFETCH_PS;
+ else if (!sctx->queued.named.ps)
+ sctx->prefetch_L2_mask &= ~SI_PREFETCH_PS;
+ }
sctx->do_update_shaders = false;
return true;