radv: rename and re-document cache flush flags
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 25 Jun 2019 15:57:45 +0000 (17:57 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 25 Jun 2019 16:38:37 +0000 (18:38 +0200)
SMEM and VMEM caches are L0 on gfx10. Ported from RadeonSI.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_meta_buffer.c
src/amd/vulkan/radv_meta_clear.c
src/amd/vulkan/radv_meta_fast_clear.c
src/amd/vulkan/radv_meta_fmask_expand.c
src/amd/vulkan/radv_meta_resolve_cs.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_query.c
src/amd/vulkan/si_cmd_buffer.c

index 29f2e0c8a6050bbf4c43c7d1f6c0627636e2b571..8ffd39896349d23f47755c933634059a97570c24 100644 (file)
@@ -2576,7 +2576,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
                case VK_ACCESS_SHADER_WRITE_BIT:
                case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
                case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
-                       flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+                       flush_bits |= RADV_CMD_FLAG_WB_L2;
                        break;
                case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
                        flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
@@ -2591,7 +2591,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
                case VK_ACCESS_TRANSFER_WRITE_BIT:
                        flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
                                      RADV_CMD_FLAG_FLUSH_AND_INV_DB |
-                                     RADV_CMD_FLAG_INV_GLOBAL_L2;
+                                     RADV_CMD_FLAG_INV_L2;
 
                        if (flush_CB_meta)
                                flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
@@ -2648,19 +2648,19 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
                case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
                        break;
                case VK_ACCESS_UNIFORM_READ_BIT:
-                       flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
+                       flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
                        break;
                case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
                case VK_ACCESS_TRANSFER_READ_BIT:
                case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
-                       flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 |
-                                     RADV_CMD_FLAG_INV_GLOBAL_L2;
+                       flush_bits |= RADV_CMD_FLAG_INV_VCACHE |
+                                     RADV_CMD_FLAG_INV_L2;
                        break;
                case VK_ACCESS_SHADER_READ_BIT:
-                       flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
+                       flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
 
                        if (!image_is_coherent)
-                               flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
+                               flush_bits |= RADV_CMD_FLAG_INV_L2;
                        break;
                case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
                        if (flush_CB)
@@ -3355,7 +3355,7 @@ VkResult radv_EndCommandBuffer(
 
        if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
                if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
-                       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+                       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
 
                /* Make sure to sync all pending active queries at the end of
                 * command buffer.
index f12b8bde1f90fd5bf6b810635d29bea22a7268e0..8d4964073cf1f76e3fa8d5df315d59560de9f9d8 100644 (file)
@@ -2704,9 +2704,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                 queue->device->physical_device->rad_info.chip_class >= GFX7,
                                               (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
                                               RADV_CMD_FLAG_INV_ICACHE |
-                                              RADV_CMD_FLAG_INV_SMEM_L1 |
-                                              RADV_CMD_FLAG_INV_VMEM_L1 |
-                                              RADV_CMD_FLAG_INV_GLOBAL_L2 |
+                                              RADV_CMD_FLAG_INV_SCACHE |
+                                              RADV_CMD_FLAG_INV_VCACHE |
+                                              RADV_CMD_FLAG_INV_L2 |
                                               RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
                } else if (i == 1) {
                        si_cs_emit_cache_flush(cs,
@@ -2715,9 +2715,9 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                               queue->queue_family_index == RING_COMPUTE &&
                                                 queue->device->physical_device->rad_info.chip_class >= GFX7,
                                               RADV_CMD_FLAG_INV_ICACHE |
-                                              RADV_CMD_FLAG_INV_SMEM_L1 |
-                                              RADV_CMD_FLAG_INV_VMEM_L1 |
-                                              RADV_CMD_FLAG_INV_GLOBAL_L2 |
+                                              RADV_CMD_FLAG_INV_SCACHE |
+                                              RADV_CMD_FLAG_INV_VCACHE |
+                                              RADV_CMD_FLAG_INV_L2 |
                                               RADV_CMD_FLAG_START_PIPELINE_STATS, 0);
                }
 
index c19bf0da1c30b79ea43c2d00524a72634e5360a9..c457ac4e5f2754e3598d158c2492f0db5224dcbc 100644 (file)
@@ -415,8 +415,8 @@ uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
        if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
                fill_buffer_shader(cmd_buffer, bo, offset, size, value);
                flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                            RADV_CMD_FLAG_INV_VMEM_L1 |
-                            RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+                            RADV_CMD_FLAG_INV_VCACHE |
+                            RADV_CMD_FLAG_WB_L2;
        } else if (size) {
                uint64_t va = radv_buffer_get_va(bo);
                va += offset;
index 4d569729dda1ed27e0c8d18f21e8884793fc9fcc..091b73841f80be9c86d1ed435ce372630626fc7a 100644 (file)
@@ -870,8 +870,8 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer,
        radv_meta_restore(&saved_state, cmd_buffer);
 
        return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-              RADV_CMD_FLAG_INV_VMEM_L1 |
-              RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+              RADV_CMD_FLAG_INV_VCACHE |
+              RADV_CMD_FLAG_WB_L2;
 }
 
 static uint32_t
index 71cf90c611ecb2e0ad49b4ec9f5ed7e0ce006205..f18f7637593e7bfc59b08d87836f236df9e5c1d1 100644 (file)
@@ -873,7 +873,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
        radv_meta_restore(&saved_state, cmd_buffer);
 
        state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                            RADV_CMD_FLAG_INV_VMEM_L1;
+                            RADV_CMD_FLAG_INV_VCACHE;
 
 
        /* Initialize the DCC metadata as "fully expanded". */
index a8f5e0cc4c191245a943fba5bd89ffa9ba81d075..c4cec58235faf6b368acb55c20d6a0d8a8ba218a 100644 (file)
@@ -169,7 +169,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
        radv_meta_restore(&saved_state, cmd_buffer);
 
        cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                                       RADV_CMD_FLAG_INV_GLOBAL_L2;
+                                       RADV_CMD_FLAG_INV_L2;
 
        /* Re-initialize FMASK in fully expanded mode. */
        radv_initialize_fmask(cmd_buffer, image, subresourceRange);
index c06f0f2c5ce6ef10a857a4ccab9cd2ce25db1cf4..7d3cc166e0d79496dda507e62e195a48fd66b60d 100644 (file)
@@ -952,7 +952,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
        }
 
        cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                                       RADV_CMD_FLAG_INV_VMEM_L1;
+                                       RADV_CMD_FLAG_INV_VCACHE;
 }
 
 void
@@ -1037,7 +1037,7 @@ radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
        }
 
        cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                                       RADV_CMD_FLAG_INV_VMEM_L1;
+                                       RADV_CMD_FLAG_INV_VCACHE;
 
        if (radv_image_has_htile(dst_image)) {
                if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
index b537778001c7c72753d5a3ff3483485ce3df506d..0c842a4d1b77d926165d8d7e3555fe27351b2300 100644 (file)
@@ -914,29 +914,33 @@ enum radv_cmd_dirty_bits {
 };
 
 enum radv_cmd_flush_bits {
-       RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
-       /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
-       RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1,
-       /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
-       RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2,
-       /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */
-       RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3,
-       /* Same as above, but only writes back and doesn't invalidate */
-       RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4,
+       /* Instruction cache. */
+       RADV_CMD_FLAG_INV_ICACHE                         = 1 << 0,
+       /* Scalar L1 cache. */
+       RADV_CMD_FLAG_INV_SCACHE                         = 1 << 1,
+       /* Vector L1 cache. */
+       RADV_CMD_FLAG_INV_VCACHE                         = 1 << 2,
+       /* L2 cache + L2 metadata cache writeback & invalidate.
+        * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
+       RADV_CMD_FLAG_INV_L2                             = 1 << 3,
+       /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
+        * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
+        * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
+       RADV_CMD_FLAG_WB_L2                              = 1 << 4,
        /* Framebuffer caches */
-       RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5,
-       RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6,
-       RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7,
-       RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8,
+       RADV_CMD_FLAG_FLUSH_AND_INV_CB_META              = 1 << 5,
+       RADV_CMD_FLAG_FLUSH_AND_INV_DB_META              = 1 << 6,
+       RADV_CMD_FLAG_FLUSH_AND_INV_DB                   = 1 << 7,
+       RADV_CMD_FLAG_FLUSH_AND_INV_CB                   = 1 << 8,
        /* Engine synchronization. */
-       RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9,
-       RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10,
-       RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11,
-       RADV_CMD_FLAG_VGT_FLUSH        = 1 << 12,
+       RADV_CMD_FLAG_VS_PARTIAL_FLUSH                   = 1 << 9,
+       RADV_CMD_FLAG_PS_PARTIAL_FLUSH                   = 1 << 10,
+       RADV_CMD_FLAG_CS_PARTIAL_FLUSH                   = 1 << 11,
+       RADV_CMD_FLAG_VGT_FLUSH                          = 1 << 12,
        /* Pipeline query controls. */
-       RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13,
-       RADV_CMD_FLAG_STOP_PIPELINE_STATS  = 1 << 14,
-       RADV_CMD_FLAG_VGT_STREAMOUT_SYNC   = 1 << 15,
+       RADV_CMD_FLAG_START_PIPELINE_STATS               = 1 << 13,
+       RADV_CMD_FLAG_STOP_PIPELINE_STATS                = 1 << 14,
+       RADV_CMD_FLAG_VGT_STREAMOUT_SYNC                 = 1 << 15,
 
        RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
                                              RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
index bec7b23af058587f8131bb6c03972edc38faeccc..82741c21bf71b1fd137fcd494b682ab62253552f 100644 (file)
@@ -1012,8 +1012,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
                                      VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
                                      &push_constants);
 
-       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
-                                       RADV_CMD_FLAG_INV_VMEM_L1;
+       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 |
+                                       RADV_CMD_FLAG_INV_VCACHE;
 
        if (flags & VK_QUERY_RESULT_WAIT_BIT)
                cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
@@ -1639,8 +1639,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
 
        cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
                                               RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                                              RADV_CMD_FLAG_INV_GLOBAL_L2 |
-                                              RADV_CMD_FLAG_INV_VMEM_L1;
+                                              RADV_CMD_FLAG_INV_L2 |
+                                              RADV_CMD_FLAG_INV_VCACHE;
        if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
                cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
                                                       RADV_CMD_FLAG_FLUSH_AND_INV_DB;
index 126cabd390ae281d6bf46cc89a874597272f27d7..52cb7477c08dbf4b0270b4b968e3b5345ff87c53 100644 (file)
@@ -781,7 +781,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
        
        if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
                cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
-       if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
+       if (flush_bits & RADV_CMD_FLAG_INV_SCACHE)
                cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
 
        if (chip_class <= GFX8) {
@@ -859,16 +859,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
                           EVENT_TC_MD_ACTION_ENA;
 
                /* Ideally flush TC together with CB/DB. */
-               if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
+               if (flush_bits & RADV_CMD_FLAG_INV_L2) {
                        /* Writeback and invalidate everything in L2 & L1. */
                        tc_flags = EVENT_TC_ACTION_ENA |
                                   EVENT_TC_WB_ACTION_ENA;
 
 
                        /* Clear the flags. */
-                       flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
-                                        RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
-                                        RADV_CMD_FLAG_INV_VMEM_L1);
+                       flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
+                                        RADV_CMD_FLAG_WB_L2 |
+                                        RADV_CMD_FLAG_INV_VCACHE);
                }
                assert(flush_cnt);
                (*flush_cnt)++;
@@ -898,16 +898,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
         */
        if ((cp_coher_cntl ||
             (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
-                           RADV_CMD_FLAG_INV_VMEM_L1 |
-                           RADV_CMD_FLAG_INV_GLOBAL_L2 |
-                           RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
+                           RADV_CMD_FLAG_INV_VCACHE |
+                           RADV_CMD_FLAG_INV_L2 |
+                           RADV_CMD_FLAG_WB_L2))) &&
            !is_mec) {
                radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
                radeon_emit(cs, 0);
        }
 
-       if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
-           (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
+       if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
+           (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
                si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
                                    cp_coher_cntl |
                                    S_0085F0_TC_ACTION_ENA(1) |
@@ -915,7 +915,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
                                    S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
                cp_coher_cntl = 0;
        } else {
-               if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
+               if(flush_bits & RADV_CMD_FLAG_WB_L2) {
                        /* WB = write-back
                         * NC = apply to non-coherent MTYPEs
                         *      (i.e. MTYPE <= 1, which is what we use everywhere)
@@ -929,7 +929,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
                                            S_0301F0_TC_NC_ACTION_ENA(1));
                        cp_coher_cntl = 0;
                }
-               if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
+               if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
                        si_emit_acquire_mem(cs, is_mec,
                                            chip_class >= GFX9,
                                            cp_coher_cntl |