* the unsynchronized map flag and expect the driver to figure it out.
*/
struct util_range valid_buffer_range;
+
+ /* For buffers only. This indicates that a write operation has been
+ * performed by TC L2, but the cache hasn't been flushed.
+ * Any hw block which doesn't use or bypasses TC L2 should check this
+ * flag and flush the cache before using the buffer.
+ *
+ * For example, TC L2 must be flushed if a buffer which has been
+ * modified by a shader store instruction is about to be used as
+ * an index buffer. The reason is that VGT DMA index fetching doesn't
+ * use TC L2.
+ */
+ bool TC_L2_dirty;
};
struct r600_transfer {
bool is_framebuffer)
{
struct si_context *sctx = (struct si_context*)ctx;
- unsigned flush_flags;
+ unsigned flush_flags, tc_l2_flag;
if (!size)
return;
uint64_t va = r600_resource(dst)->gpu_address + offset;
/* Flush the caches where the resource is bound. */
- if (is_framebuffer)
+ if (is_framebuffer) {
flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- else
+ tc_l2_flag = 0;
+ } else {
flush_flags = SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
+ (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
SI_CONTEXT_INV_KCACHE;
+ tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ }
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
flush_flags;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
- unsigned dma_flags = 0;
+ unsigned dma_flags = tc_l2_flag;
si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0),
FALSE);
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
sctx->b.flags |= flush_flags;
+
+ if (tc_l2_flag)
+ r600_resource(dst)->TC_L2_dirty = true;
}
void si_copy_buffer(struct si_context *sctx,
uint64_t dst_offset, uint64_t src_offset, unsigned size,
bool is_framebuffer)
{
- unsigned flush_flags;
+ unsigned flush_flags, tc_l2_flag;
if (!size)
return;
src_offset += r600_resource(src)->gpu_address;
/* Flush the caches where the resource is bound. */
- if (is_framebuffer)
+ if (is_framebuffer) {
flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- else
+ tc_l2_flag = 0;
+ } else {
flush_flags = SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
+ (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) |
SI_CONTEXT_INV_KCACHE;
+ tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+ }
sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
flush_flags;
while (size) {
- unsigned sync_flags = 0;
+ unsigned sync_flags = tc_l2_flag;
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE);
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
sctx->b.flags |= flush_flags;
+
+ if (tc_l2_flag)
+ r600_resource(dst)->TC_L2_dirty = true;
}
/* INIT/DEINIT */