From: Marek Olšák Date: Tue, 30 Dec 2014 17:41:25 +0000 (+0100) Subject: radeonsi: emit SURFACE_SYNC last X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d8185aa9a8e3588fe014faef8afaeae56d45e90b;p=mesa.git radeonsi: emit SURFACE_SYNC last This fixes a case where a transform feedback buffer is fed back as an index buffer, because SURFACE_SYNC must be after VS_PARTIAL_FLUSH. Reviewed-by: Michel Dänzer --- diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 3703e5f3d58..cd4880bfd2d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -369,6 +369,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato { struct radeon_winsys_cs *cs = sctx->rings.gfx.cs; uint32_t cp_coher_cntl = 0; + uint32_t sqc_caches = 0; uint32_t compute = PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE)); @@ -377,10 +378,9 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato if (sctx->chip_class == SI && sctx->flags & BOTH_ICACHE_KCACHE && (sctx->flags & BOTH_ICACHE_KCACHE) != BOTH_ICACHE_KCACHE) { - r600_write_config_reg(cs, R_008C08_SQC_CACHES, + sqc_caches = S_008C08_INST_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_ICACHE)) | - S_008C08_DATA_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_KCACHE))); - cs->buf[cs->cdw-3] |= compute; /* set the compute bit in the header */ + S_008C08_DATA_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_KCACHE)); } else { if (sctx->flags & SI_CONTEXT_INV_ICACHE) cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); @@ -409,24 +409,6 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato S_0085F0_DB_DEST_BASE_ENA(1); } - if (cp_coher_cntl) { - if (sctx->chip_class >= CIK) { - radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute); - radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ - radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */ - radeon_emit(cs, 0); /* CP_COHER_BASE */ - radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ - radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ - } else { - radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute); - radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ - radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ - radeon_emit(cs, 0); /* CP_COHER_BASE */ - radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ - } - } - if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB_META) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); @@ -441,6 +423,12 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato EVENT_WRITE_INV_L2); } + /* FLUSH_AND_INV events must be emitted before PS_PARTIAL_FLUSH. + * Otherwise, clearing CMASK (CB meta) with CP DMA isn't reliable. + * + * I think the reason is that FLUSH_AND_INV is only added to a queue + * and it is PS_PARTIAL_FLUSH that waits for it to complete. + */ if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); @@ -448,12 +436,10 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } - if (sctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); } - if (sctx->flags & SI_CONTEXT_VGT_FLUSH) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); @@ -463,6 +449,32 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); } + /* SURFACE_SYNC must be emitted after partial flushes. + * It looks like SURFACE_SYNC flushes caches immediately and doesn't + * wait for any engines. This should be last. + */ + if (sqc_caches) { + r600_write_config_reg(cs, R_008C08_SQC_CACHES, sqc_caches); + cs->buf[cs->cdw-3] |= compute; /* set the compute bit in the header */ + } + if (cp_coher_cntl) { + if (sctx->chip_class >= CIK) { + radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute); + radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ + radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */ + radeon_emit(cs, 0); /* CP_COHER_BASE */ + radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ + radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ + } else { + radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute); + radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ + radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ + radeon_emit(cs, 0); /* CP_COHER_BASE */ + radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ + } + } + sctx->flags = 0; }