radeonsi: emit SURFACE_SYNC last

author Marek Olšák <marek.olsak@amd.com>

Tue, 30 Dec 2014 17:41:25 +0000 (18:41 +0100)

committer Marek Olšák <marek.olsak@amd.com>

Wed, 7 Jan 2015 11:06:43 +0000 (12:06 +0100)
author Marek Olšák <marek.olsak@amd.com>
Tue, 30 Dec 2014 17:41:25 +0000 (18:41 +0100)
committer Marek Olšák <marek.olsak@amd.com>
Wed, 7 Jan 2015 11:06:43 +0000 (12:06 +0100)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c

index 3703e5f3d589848c1b0bd649beee79db34d0026b..cd4880bfd2db9dc6c6980c12ed1b1effdcb01b21 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -369,6 +369,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
  {
         struct radeon_winsys_cs *cs = sctx->rings.gfx.cs;
         uint32_t cp_coher_cntl = 0;
+       uint32_t sqc_caches = 0;
         uint32_t compute =
                 PKT3_SHADER_TYPE_S(!!(sctx->flags & SI_CONTEXT_FLAG_COMPUTE));
  
@@ -377,10 +378,9 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
         if (sctx->chip_class == SI &&
             sctx->flags & BOTH_ICACHE_KCACHE &&
             (sctx->flags & BOTH_ICACHE_KCACHE) != BOTH_ICACHE_KCACHE) {
-               r600_write_config_reg(cs, R_008C08_SQC_CACHES,
+               sqc_caches =
                         S_008C08_INST_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_ICACHE)) |
-                       S_008C08_DATA_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_KCACHE)));
-               cs->buf[cs->cdw-3] |= compute; /* set the compute bit in the header */
+                       S_008C08_DATA_INVALIDATE(!!(sctx->flags & SI_CONTEXT_INV_KCACHE));
         } else {
                 if (sctx->flags & SI_CONTEXT_INV_ICACHE)
                         cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
@@ -409,24 +409,6 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
                                  S_0085F0_DB_DEST_BASE_ENA(1);
         }
  
-       if (cp_coher_cntl) {
-               if (sctx->chip_class >= CIK) {
-                       radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute);
-                       radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-                       radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
-                       radeon_emit(cs, 0xff);            /* CP_COHER_SIZE_HI */
-                       radeon_emit(cs, 0);               /* CP_COHER_BASE */
-                       radeon_emit(cs, 0);               /* CP_COHER_BASE_HI */
-                       radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
-               } else {
-                       radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute);
-                       radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-                       radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
-                       radeon_emit(cs, 0);               /* CP_COHER_BASE */
-                       radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
-               }
-       }
-
         if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB_META) {
                 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                 radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
@@ -441,6 +423,12 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
                                 EVENT_WRITE_INV_L2);
          }
  
+       /* FLUSH_AND_INV events must be emitted before PS_PARTIAL_FLUSH.
+        * Otherwise, clearing CMASK (CB meta) with CP DMA isn't reliable.
+        *
+        * I think the reason is that FLUSH_AND_INV is only added to a queue
+        * and it is PS_PARTIAL_FLUSH that waits for it to complete.
+        */
         if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
                 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                 radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
@@ -448,12 +436,10 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
                 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
         }
-
         if (sctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH) {
                 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                 radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
         }
-
         if (sctx->flags & SI_CONTEXT_VGT_FLUSH) {
                 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute);
                 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
@@ -463,6 +449,32 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
                 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
         }
  
+       /* SURFACE_SYNC must be emitted after partial flushes.
+        * It looks like SURFACE_SYNC flushes caches immediately and doesn't
+        * wait for any engines. This should be last.
+        */
+       if (sqc_caches) {
+               r600_write_config_reg(cs, R_008C08_SQC_CACHES, sqc_caches);
+               cs->buf[cs->cdw-3] |= compute; /* set the compute bit in the header */
+       }
+       if (cp_coher_cntl) {
+               if (sctx->chip_class >= CIK) {
+                       radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute);
+                       radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+                       radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
+                       radeon_emit(cs, 0xff);            /* CP_COHER_SIZE_HI */
+                       radeon_emit(cs, 0);               /* CP_COHER_BASE */
+                       radeon_emit(cs, 0);               /* CP_COHER_BASE_HI */
+                       radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
+               } else {
+                       radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute);
+                       radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+                       radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
+                       radeon_emit(cs, 0);               /* CP_COHER_BASE */
+                       radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
+               }
+       }
+
         sctx->flags = 0;
  }
author	Marek Olšák <marek.olsak@amd.com>
	Tue, 30 Dec 2014 17:41:25 +0000 (18:41 +0100)
committer	Marek Olšák <marek.olsak@amd.com>
	Wed, 7 Jan 2015 11:06:43 +0000 (12:06 +0100)