unsigned old_num_targets = sctx->b.streamout.num_targets;
unsigned i, bufidx;
+ /* We are going to unbind the buffers. Mark which caches need to be flushed. */
+ if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) {
+ /* Since streamout uses vector writes which go through TC L2
+ * and most other clients can use TC L2 as well, we don't need
+ * to flush it.
+ *
+ * The only case which requires flushing it is VGT DMA index
+ * fetching, which is a rare case. Thus, flag the TC L2
+ * dirtiness in the resource and handle it when index fetching
+ * is used.
+ */
+ for (i = 0; i < sctx->b.streamout.num_targets; i++)
+ if (sctx->b.streamout.targets[i])
+ r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
+
+ /* Invalidate the scalar cache in case a streamout buffer is
+ * going to be used as a constant buffer.
+ *
+ * Invalidate TC L1, because streamout bypasses it (done by
+ * setting GLC=1 in the store instruction), but it can contain
+ * outdated data of streamout buffers.
+ *
+ * VS_PARTIAL_FLUSH is required if the buffers are going to be
+ * used as an input immediately.
+ */
+ sctx->b.flags |= SI_CONTEXT_INV_KCACHE |
+ SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_VS_PARTIAL_FLUSH;
+ }
+
/* Streamout buffers must be bound in 2 places:
* 1) in VGT by setting the VGT_STRMOUT registers
* 2) as shader resources
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6)
#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7)
/* Engine synchronization. */
-#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8)
-#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9)
-#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10)
-#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 11)
+#define SI_CONTEXT_VS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8)
+#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9)
+#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10)
+#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11)
+#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 12)
/* Compute only. */
-#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 12) /* TODO: merge with TC? */
-#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 13)
+#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */
+#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 14)
#define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \
SI_CONTEXT_FLUSH_AND_INV_CB_META | \
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
}
- if (sctx->flags & (SI_CONTEXT_INV_TC_L1 | R600_CONTEXT_STREAMOUT_FLUSH))
+ if (sctx->flags & SI_CONTEXT_INV_TC_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
- if (sctx->flags & (SI_CONTEXT_INV_TC_L2 | R600_CONTEXT_STREAMOUT_FLUSH))
+ if (sctx->flags & SI_CONTEXT_INV_TC_L2)
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
- } else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
- /* Needed if streamout buffers are going to be used as a source. */
+ } else if (sctx->flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}