radeonsi: rework clear_buffer flags
authorMarek Olšák <marek.olsak@amd.com>
Fri, 22 Apr 2016 08:26:28 +0000 (10:26 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 28 Apr 2016 18:16:56 +0000 (20:16 +0200)
Changes:
- don't flush DB for fast color clears
- don't flush any caches for initial clears
- remove the flag from si_copy_buffer, always assume shader coherency

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeon/radeon_video.c
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_cp_dma.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h

index 2d30807c9f49a851a4af45fb7289088844466927..ed67cb8699fbdf0602387b715ca54878b14d1a1a 100644 (file)
@@ -582,7 +582,7 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
 
 static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
                              uint64_t offset, uint64_t size, unsigned value,
-                             bool is_framebuffer)
+                             enum r600_coherency coher)
 {
        struct r600_context *rctx = (struct r600_context*)ctx;
 
index 929fecb928472b438fc7dc8060d8d24bb4500f37..823ba46e8c9bd94af4d94d3f4cd9184795ef0bfb 100644 (file)
@@ -984,12 +984,12 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
                              uint64_t offset, uint64_t size, unsigned value,
-                             bool is_framebuffer)
+                             enum r600_coherency coher)
 {
        struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 
        pipe_mutex_lock(rscreen->aux_context_lock);
-       rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
+       rctx->clear_buffer(&rctx->b, dst, offset, size, value, coher);
        rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
        pipe_mutex_unlock(rscreen->aux_context_lock);
 }
index d7478efa4bc3ff54cb356cd0fa4533f9bde632f2..74eefbb8fc51aad9f840f506d79cb002ccf1047b 100644 (file)
 #define R600_MAP_BUFFER_ALIGNMENT 64
 #define R600_MAX_VIEWPORTS        16
 
+enum r600_coherency {
+       R600_COHERENCY_NONE, /* no cache flushes needed */
+       R600_COHERENCY_SHADER,
+       R600_COHERENCY_CB_META,
+};
+
 #ifdef PIPE_ARCH_BIG_ENDIAN
 #define R600_BIG_ENDIAN 1
 #else
@@ -513,7 +519,7 @@ struct r600_common_context {
 
        void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
                             uint64_t offset, uint64_t size, unsigned value,
-                            bool is_framebuffer);
+                            enum r600_coherency coher);
 
        void (*blit_decompress_depth)(struct pipe_context *ctx,
                                      struct r600_texture *texture,
@@ -584,7 +590,7 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
                          unsigned processor);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
                              uint64_t offset, uint64_t size, unsigned value,
-                             bool is_framebuffer);
+                             enum r600_coherency coher);
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
                                                  const struct pipe_resource *templ);
 const char *r600_get_llvm_processor_name(enum radeon_family family);
index 7e58490e66323b0c1b490dfb0e806af55c7e94aa..41bc48a5f7e626c24c951d9aa08657f294dbaf99 100644 (file)
@@ -717,7 +717,7 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
                R600_ERR("Failed to create buffer object for htile buffer.\n");
        } else {
                r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
-                                        htile_size, 0, true);
+                                        htile_size, 0, R600_COHERENCY_NONE);
        }
 }
 
@@ -892,13 +892,13 @@ r600_texture_create_object(struct pipe_screen *screen,
                /* Initialize the cmask to 0xCC (= compressed state). */
                r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
                                         rtex->cmask.offset, rtex->cmask.size,
-                                        0xCCCCCCCC, true);
+                                        0xCCCCCCCC, R600_COHERENCY_NONE);
        }
        if (rtex->dcc_offset) {
                r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
                                         rtex->dcc_offset,
                                         rtex->surface.dcc_size,
-                                        0xFFFFFFFF, true);
+                                        0xFFFFFFFF, R600_COHERENCY_NONE);
        }
 
        /* Initialize the CMASK base register value. */
@@ -1623,7 +1623,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 
                        rctx->clear_buffer(&rctx->b, &tex->resource.b.b,
                                           tex->dcc_offset, tex->surface.dcc_size,
-                                          reset_value, true);
+                                          reset_value, R600_COHERENCY_CB_META);
 
                        if (clear_words_needed)
                                tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
@@ -1640,7 +1640,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 
                        /* Do the fast clear. */
                        rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
-                                       tex->cmask.offset, tex->cmask.size, 0, true);
+                                          tex->cmask.offset, tex->cmask.size, 0,
+                                          R600_COHERENCY_CB_META);
 
                        tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
                }
index e2ff037fbb32ee807ecd2674d7a69d0e90753c98..acbf79005f4d7debf15595ed7c75ebe964698240 100644 (file)
@@ -122,7 +122,7 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
        struct r600_common_context *rctx = (struct r600_common_context*)context;
 
        rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
-                          0, false);
+                          0, R600_COHERENCY_NONE);
        context->flush(context, NULL, 0);
 }
 
index 6fa5b975d9d7e01e11133eeaf0db0988f537a1b1..0233e10c9757fea2e7e2e08553569d2d708cc597 100644 (file)
@@ -630,7 +630,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
 
        /* Handle buffers first. */
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-               si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
+               si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
                return;
        }
 
@@ -949,7 +949,8 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
                dword_value = *(uint32_t*)clear_value_ptr;
        }
 
-       sctx->b.clear_buffer(ctx, dst, offset, size, dword_value, false);
+       sctx->b.clear_buffer(ctx, dst, offset, size, dword_value,
+                            R600_COHERENCY_SHADER);
 }
 
 void si_init_blit_functions(struct si_context *sctx)
index bca9cc5020eebb48ba53006fcb7de25d00dfdf78..cbb84b00ce42787726701b1b6822e366d8ab3559 100644 (file)
@@ -107,19 +107,26 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx,
        }
 }
 
-static unsigned get_flush_flags(struct si_context *sctx, bool is_framebuffer)
+static unsigned get_flush_flags(struct si_context *sctx, enum r600_coherency coher)
 {
-       if (is_framebuffer)
-               return SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-
-       return SI_CONTEXT_INV_SMEM_L1 |
-              SI_CONTEXT_INV_VMEM_L1 |
-              (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
+       switch (coher) {
+       default:
+       case R600_COHERENCY_NONE:
+               return 0;
+       case R600_COHERENCY_SHADER:
+               return SI_CONTEXT_INV_SMEM_L1 |
+                      SI_CONTEXT_INV_VMEM_L1 |
+                      (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0);
+       case R600_COHERENCY_CB_META:
+               return SI_CONTEXT_FLUSH_AND_INV_CB |
+                      SI_CONTEXT_FLUSH_AND_INV_CB_META;
+       }
 }
 
-static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer)
+static unsigned get_tc_l2_flag(struct si_context *sctx, enum r600_coherency coher)
 {
-       return is_framebuffer || sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2;
+       return coher == R600_COHERENCY_SHADER &&
+              sctx->b.chip_class >= CIK ? CIK_CP_DMA_USE_L2 : 0;
 }
 
 static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst,
@@ -159,11 +166,11 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 
 static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
                            uint64_t offset, uint64_t size, unsigned value,
-                           bool is_framebuffer)
+                           enum r600_coherency coher)
 {
        struct si_context *sctx = (struct si_context*)ctx;
-       unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
-       unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
+       unsigned tc_l2_flag = get_tc_l2_flag(sctx, coher);
+       unsigned flush_flags = get_flush_flags(sctx, coher);
 
        if (!size)
                return;
@@ -249,14 +256,13 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size)
 
 void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
-                   uint64_t dst_offset, uint64_t src_offset, unsigned size,
-                   bool is_framebuffer)
+                   uint64_t dst_offset, uint64_t src_offset, unsigned size)
 {
        uint64_t main_dst_offset, main_src_offset;
        unsigned skipped_size = 0;
        unsigned realign_size = 0;
-       unsigned tc_l2_flag = get_tc_l2_flag(sctx, is_framebuffer);
-       unsigned flush_flags = get_flush_flags(sctx, is_framebuffer);
+       unsigned tc_l2_flag = get_tc_l2_flag(sctx, R600_COHERENCY_SHADER);
+       unsigned flush_flags = get_flush_flags(sctx, R600_COHERENCY_SHADER);
 
        if (!size)
                return;
index ab6ea40ac9314debb1f3affa31eea06cb088af47..61d55781161833f0a9b38d447b2ed82e53b54c89 100644 (file)
@@ -224,7 +224,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 
                /* Clear the NULL constant buffer, because loads should return zeros. */
                sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
-                                    sctx->null_const_buf.buffer->width0, 0, false);
+                                    sctx->null_const_buf.buffer->width0, 0,
+                                    R600_COHERENCY_SHADER);
        }
 
        /* XXX: This is the maximum value allowed.  I'm not sure how to compute
index 13946a5ad71553c86a561caad22ac9eaefde6071..d31e9a93ef93025de31d7265db7660c6316d29eb 100644 (file)
@@ -348,8 +348,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
 /* si_cp_dma.c */
 void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
-                   uint64_t dst_offset, uint64_t src_offset, unsigned size,
-                   bool is_framebuffer);
+                   uint64_t dst_offset, uint64_t src_offset, unsigned size);
 void si_init_cp_dma_functions(struct si_context *sctx);
 
 /* si_debug.c */