struct radeon_winsys_cs *cs = sctx->gfx_cs;
uint32_t header = 0, command = 0;
- assert(size);
assert(size <= cp_dma_max_byte_count(sctx));
if (sctx->chip_class >= GFX9)
}
}
+void si_cp_dma_wait_for_idle(struct si_context *sctx)
+{
+ /* Issue a dummy DMA that copies zero bytes.
+ *
+ * The DMA engine will see that there's no work to do and skip this
+ * DMA request, however, the CP will see the sync flag and still wait
+ * for all DMAs to complete.
+ */
+ si_emit_cp_dma(sctx, 0, 0, 0, CP_DMA_SYNC, SI_COHERENCY_NONE);
+}
+
static unsigned get_flush_flags(struct si_context *sctx, enum si_coherency coher)
{
switch (coher) {
ctx->flags |= SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_INV_VMEM_L1;
- si_emit_cache_flush(ctx);
+ /* Make sure CP DMA is idle at the end of IBs after L2 prefetches
+ * because the kernel doesn't wait for it. */
+ if (ctx->chip_class >= CIK)
+ si_cp_dma_wait_for_idle(ctx);
if (ctx->current_saved_cs) {
si_trace_emit(ctx);
SI_COHERENCY_CB_META,
};
+void si_cp_dma_wait_for_idle(struct si_context *sctx);
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value,
enum si_coherency coher);