radeonsi: don't use TC L2 for updating descriptors on SI
authorMarek Olšák <marek.olsak@amd.com>
Sun, 4 Jan 2015 21:16:53 +0000 (22:16 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 7 Jan 2015 11:06:43 +0000 (12:06 +0100)
It's causing problems, because we mix uncached CP DMA with cached WRITE_DATA
when updating the same memory.

The solution for SI is to use uncached access here, because CP DMA doesn't
support cached access.

CIK will be handled in the next patch.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/sid.h

index c9599617ede7bfbc49ff259882c4c7ce8dffa9c5..f0e353a1db9eb13136e438404209b080b7441ad6 100644 (file)
@@ -167,8 +167,18 @@ static void si_update_descriptors(struct si_context *sctx,
                        desc->atom.num_dw += 4; /* second pointer update */
 
                desc->atom.dirty = true;
+
+               /* TODO: Investigate if these flushes can be removed after
+                * adding CE support. */
+
                /* The descriptors are read with the K cache. */
                sctx->b.flags |= SI_CONTEXT_INV_KCACHE;
+
+               /* Since SI uses uncached CP DMA to update descriptors,
+                * we have to flush TC L2, which is used to fetch constants
+                * along with KCACHE. */
+               if (sctx->b.chip_class == SI)
+                       sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
        } else {
                desc->atom.dirty = false;
        }
@@ -248,7 +258,9 @@ static void si_emit_descriptors(struct si_context *sctx,
                        packet_size = 2 + desc->element_dw_size;
 
                        radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0));
-                       radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_TC_OR_L2) |
+                       radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(sctx->b.chip_class == SI ?
+                                               PKT3_WRITE_DATA_DST_SEL_MEM_SYNC :
+                                               PKT3_WRITE_DATA_DST_SEL_TC_L2) |
                                             PKT3_WRITE_DATA_WR_CONFIRM |
                                             PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
                        radeon_emit(cs, va & 0xFFFFFFFFUL);
index c78ba7c2991c92e536c477c1e8db1ff2da0457d3..e36244dc7d4eefab359fe0bb29aec98ec23332bc 100644 (file)
@@ -98,7 +98,7 @@
 #define     PKT3_WRITE_DATA_DST_SEL(x)             ((x) << 8)
 #define     PKT3_WRITE_DATA_DST_SEL_REG            0
 #define     PKT3_WRITE_DATA_DST_SEL_MEM_SYNC       1
-#define     PKT3_WRITE_DATA_DST_SEL_TC_OR_L2       2
+#define     PKT3_WRITE_DATA_DST_SEL_TC_L2          2
 #define     PKT3_WRITE_DATA_DST_SEL_GDS            3
 #define     PKT3_WRITE_DATA_DST_SEL_RESERVED_4     4
 #define     PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC      5