sctx->vertex_elements->desc_list_byte_size);
}
-void cik_emit_prefetch_L2(struct si_context *sctx)
+/**
+ * Prefetch shaders and VBO descriptors.
+ *
+ * \param vertex_stage_only Whether only the the API VS and VBO descriptors
+ * should be prefetched.
+ */
+void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only)
{
+ unsigned mask = sctx->prefetch_L2_mask;
+ assert(mask);
+
/* Prefetch shaders and VBO descriptors to TC L2. */
if (sctx->chip_class >= GFX9) {
/* Choose the right spot for the VBO prefetch. */
if (sctx->tes_shader.cso) {
- if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
+ if (mask & SI_PREFETCH_HS)
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_HS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else if (sctx->gs_shader.cso) {
- if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_GS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else {
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
}
} else {
/* SI-CI-VI */
/* Choose the right spot for the VBO prefetch. */
if (sctx->tes_shader.cso) {
- if (sctx->prefetch_L2_mask & SI_PREFETCH_LS)
+ if (mask & SI_PREFETCH_LS)
cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_HS)
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_LS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_HS)
cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
+ if (mask & SI_PREFETCH_ES)
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else if (sctx->gs_shader.cso) {
- if (sctx->prefetch_L2_mask & SI_PREFETCH_ES)
+ if (mask & SI_PREFETCH_ES)
cik_prefetch_shader_async(sctx, sctx->queued.named.es);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_GS)
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_ES |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_GS)
cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
} else {
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VS)
+ if (mask & SI_PREFETCH_VS)
cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
- if (sctx->prefetch_L2_mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
}
}
- if (sctx->prefetch_L2_mask & SI_PREFETCH_PS)
+ if (mask & SI_PREFETCH_PS)
cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
sctx->prefetch_L2_mask = 0;
* in parallel, but starting the draw first is more important.
*/
if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
- cik_emit_prefetch_L2(sctx);
+ cik_emit_prefetch_L2(sctx, false);
} else {
/* If we don't wait for idle, start prefetches first, then set
* states, and draw at the end.
if (sctx->flags)
si_emit_cache_flush(sctx);
+ /* Only prefetch the API VS and VBO descriptors. */
if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
- cik_emit_prefetch_L2(sctx);
+ cik_emit_prefetch_L2(sctx, true);
if (!si_upload_graphics_shader_descriptors(sctx))
return;
si_emit_all_states(sctx, info, 0);
si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
+
+ /* Prefetch the remaining shaders after the draw has been
+ * started. */
+ if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
+ cik_emit_prefetch_L2(sctx, false);
}
if (unlikely(sctx->current_saved_cs)) {