+
+ /* If it's not a prefetch or GDS copy... */
+ if (dst && src && (dst != src || dst_offset != src_offset))
+ sctx->num_cp_dma_calls++;
+}
+
+void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf,
+ uint64_t offset, unsigned size)
+{
+ assert(sctx->chip_class >= CIK);
+
+ si_cp_dma_copy_buffer(sctx, buf, buf, offset, offset, size,
+ SI_CPDMA_SKIP_ALL, SI_COHERENCY_SHADER, L2_LRU);
+}
+
+static void cik_prefetch_shader_async(struct si_context *sctx,
+ struct si_pm4_state *state)
+{
+ struct pipe_resource *bo = &state->bo[0]->b.b;
+ assert(state->nbo == 1);
+
+ cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0);
+}
+
+static void cik_prefetch_VBO_descriptors(struct si_context *sctx)
+{
+ if (!sctx->vertex_elements || !sctx->vertex_elements->desc_list_byte_size)
+ return;
+
+ cik_prefetch_TC_L2_async(sctx, &sctx->vb_descriptors_buffer->b.b,
+ sctx->vb_descriptors_offset,
+ sctx->vertex_elements->desc_list_byte_size);
+}
+
+/**
+ * Prefetch shaders and VBO descriptors.
+ *
+ * \param vertex_stage_only Whether only the the API VS and VBO descriptors
+ * should be prefetched.
+ */
+void cik_emit_prefetch_L2(struct si_context *sctx, bool vertex_stage_only)
+{
+ unsigned mask = sctx->prefetch_L2_mask;
+ assert(mask);
+
+ /* Prefetch shaders and VBO descriptors to TC L2. */
+ if (sctx->chip_class >= GFX9) {
+ /* Choose the right spot for the VBO prefetch. */
+ if (sctx->tes_shader.cso) {
+ if (mask & SI_PREFETCH_HS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_HS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else if (sctx->gs_shader.cso) {
+ if (mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_GS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else {
+ if (mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+ }
+ } else {
+ /* SI-CI-VI */
+ /* Choose the right spot for the VBO prefetch. */
+ if (sctx->tes_shader.cso) {
+ if (mask & SI_PREFETCH_LS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.ls);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_LS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_HS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.hs);
+ if (mask & SI_PREFETCH_ES)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.es);
+ if (mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else if (sctx->gs_shader.cso) {
+ if (mask & SI_PREFETCH_ES)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.es);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_ES |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+
+ if (mask & SI_PREFETCH_GS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.gs);
+ if (mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ } else {
+ if (mask & SI_PREFETCH_VS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.vs);
+ if (mask & SI_PREFETCH_VBO_DESCRIPTORS)
+ cik_prefetch_VBO_descriptors(sctx);
+ if (vertex_stage_only) {
+ sctx->prefetch_L2_mask &= ~(SI_PREFETCH_VS |
+ SI_PREFETCH_VBO_DESCRIPTORS);
+ return;
+ }
+ }
+ }
+
+ if (mask & SI_PREFETCH_PS)
+ cik_prefetch_shader_async(sctx, sctx->queued.named.ps);
+
+ sctx->prefetch_L2_mask = 0;