radv: Add L2 writeback.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 6 Mar 2017 00:28:53 +0000 (01:28 +0100)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 6 Mar 2017 08:15:51 +0000 (09:15 +0100)
Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_private.h
src/amd/vulkan/si_cmd_buffer.c

index 0e629fcf836e0de02ecbba50331f8a936b048185..30201a67e73338db385031eba6091837f6bdf805 100644 (file)
@@ -587,16 +587,18 @@ enum radv_cmd_flush_bits {
        RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
        /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
        RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
+       /* Same as above, but only writes back and doesn't invalidate */
+       RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
        /* Framebuffer caches */
-       RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 4,
-       RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 5,
-       RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 6,
-       RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 7,
+       RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
+       RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
+       RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
+       RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
        /* Engine synchronization. */
-       RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 8,
-       RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 9,
-       RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 10,
-       RADV_CMD_FLAG_VGT_FLUSH        = 1 << 11,
+       RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
+       RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
+       RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
+       RADV_CMD_FLAG_VGT_FLUSH        = 1 << 12,
 
        RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
                                              RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
index 4709ef69a027b0ebb523e6382138243565754806..5d35287f8e37e59cd84d4a02165bfd79b26f07f3 100644 (file)
@@ -689,6 +689,30 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 
 }
 
+static void
+si_emit_acquire_mem(struct radeon_winsys_cs *cs,
+                    bool is_mec,
+                    unsigned cp_coher_cntl)
+{
+       if (is_mec) {
+               radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
+                                           PKT3_SHADER_TYPE_S(1));
+               radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
+               radeon_emit(cs, 0xff);            /* CP_COHER_SIZE_HI */
+               radeon_emit(cs, 0);               /* CP_COHER_BASE */
+               radeon_emit(cs, 0);               /* CP_COHER_BASE_HI */
+               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
+       } else {
+               /* ACQUIRE_MEM is only required on a compute ring. */
+               radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+               radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+               radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
+               radeon_emit(cs, 0);               /* CP_COHER_BASE */
+               radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
+       }
+}
+
 void
 si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                        enum chip_class chip_class,
@@ -701,13 +725,6 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
        if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
                cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
-       if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
-               cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-       if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
-               cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
-               if (chip_class >= VI)
-                       cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
-       }
 
        if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
                cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
@@ -778,28 +795,29 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                radeon_emit(cs, 0);
        }
 
+       if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
+           (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
+               cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+               if (chip_class >= VI)
+                       cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+       } else  if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
+               cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) |
+                                S_0301F0_TC_NC_ACTION_ENA(1);
+
+               /* L2 writeback doesn't combine with L1 invalidate */
+               si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
+
+               cp_coher_cntl = 0;
+       }
+
+       if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
+               cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
+
        /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
         * Therefore, it should be last. Done in PFP.
         */
-       if (cp_coher_cntl) {
-               if (is_mec) {
-                       radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
-                                                   PKT3_SHADER_TYPE_S(1));
-                       radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-                       radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
-                       radeon_emit(cs, 0xff);            /* CP_COHER_SIZE_HI */
-                       radeon_emit(cs, 0);               /* CP_COHER_BASE */
-                       radeon_emit(cs, 0);               /* CP_COHER_BASE_HI */
-                       radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
-               } else {
-                       /* ACQUIRE_MEM is only required on a compute ring. */
-                       radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
-                       radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-                       radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
-                       radeon_emit(cs, 0);               /* CP_COHER_BASE */
-                       radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
-               }
-       }
+       if (cp_coher_cntl)
+               si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
 }
 
 void