radeonsi: only flush the right set of caches for CP DMA operations
authorMarek Olšák <marek.olsak@amd.com>
Mon, 29 Dec 2014 13:45:49 +0000 (14:45 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 7 Jan 2015 11:06:43 +0000 (12:06 +0100)
That's either framebuffer caches or caches for shader resources.
The motivation is that framebuffer caches need to be flushed very rarely
here.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeon/radeon_video.c
src/gallium/drivers/radeonsi/si_blit.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_state.h

index bdc5f9f9bc219b13dffb0cfe6653d3bc0dc0dad2..01262a59e906d745bf85c54443cb5c3dd29b0cb0 100644 (file)
@@ -565,7 +565,8 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
 }
 
 static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
-                             unsigned offset, unsigned size, unsigned value)
+                             unsigned offset, unsigned size, unsigned value,
+                             bool is_framebuffer)
 {
        struct r600_context *rctx = (struct r600_context*)ctx;
 
index d46b3b38f8685a5638436ca6b1f54fa816f30b66..04fc9c59c73211c80f49d697d74782bb9f57ba33 100644 (file)
@@ -912,12 +912,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
 }
 
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
-                             unsigned offset, unsigned size, unsigned value)
+                             unsigned offset, unsigned size, unsigned value,
+                             bool is_framebuffer)
 {
        struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
 
        pipe_mutex_lock(rscreen->aux_context_lock);
-       rctx->clear_buffer(&rctx->b, dst, offset, size, value);
+       rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
        rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
        pipe_mutex_unlock(rscreen->aux_context_lock);
 }
index 15736d7b1c9f627656aa2f69fa073685c6aeacc3..a9416b686ed8f68a80ec08ca2f44c9490ba94794 100644 (file)
@@ -388,7 +388,8 @@ struct r600_common_context {
                         const struct pipe_box *src_box);
 
        void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
-                            unsigned offset, unsigned size, unsigned value);
+                            unsigned offset, unsigned size, unsigned value,
+                            bool is_framebuffer);
 
        void (*blit_decompress_depth)(struct pipe_context *ctx,
                                      struct r600_texture *texture,
@@ -441,7 +442,8 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
 bool r600_can_dump_shader(struct r600_common_screen *rscreen,
                          const struct tgsi_token *tokens);
 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
-                             unsigned offset, unsigned size, unsigned value);
+                             unsigned offset, unsigned size, unsigned value,
+                             bool is_framebuffer);
 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
                                                  const struct pipe_resource *templ);
 const char *r600_get_llvm_processor_name(enum radeon_family family);
index fdf4d763f2d9b1f23116c8d397cbe5b336a2bfe3..ab8ce7bd7136d9be8d0f6cd2173dc4c3b6c70ce5 100644 (file)
@@ -559,7 +559,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
                 * without htile buffer */
                R600_ERR("Failed to create buffer object for htile buffer.\n");
        } else {
-               r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0);
+               r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
+                                        htile_size, 0, true);
        }
 }
 
@@ -638,7 +639,8 @@ r600_texture_create_object(struct pipe_screen *screen,
        if (rtex->cmask.size) {
                /* Initialize the cmask to 0xCC (= compressed state). */
                r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
-                                        rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC);
+                                        rtex->cmask.offset, rtex->cmask.size,
+                                        0xCCCCCCCC, true);
        }
 
        /* Initialize the CMASK base register value. */
@@ -1273,7 +1275,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                /* Do the fast clear. */
                evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
                rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
-                                  tex->cmask.offset, tex->cmask.size, 0);
+                                  tex->cmask.offset, tex->cmask.size, 0, true);
 
                tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
                fb_state->dirty = true;
index f6cfdff7aa2505e846edba692b009937aa252d9e..14207989325c7cf67974b365e7d968f7b010e7dd 100644 (file)
@@ -122,7 +122,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
 {
        struct r600_common_context *rctx = (struct r600_common_context*)context;
 
-       rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, 0);
+       rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
+                          0, false);
        context->flush(context, NULL, 0);
 }
 
index 4744154c7e21763dee4d29c057a77dda3ed7c183..1f2c4082dbcecdc217ad4410b4e9d5e028f4f3e6 100644 (file)
@@ -556,7 +556,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
 
        /* Fallback for buffers. */
        if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
-               si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
+               si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
                return;
        }
 
index d46f4e522e60c956ca32432ec946d27dcc6e71d5..c9599617ede7bfbc49ff259882c4c7ce8dffa9c5 100644 (file)
@@ -1054,9 +1054,11 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
 #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
 
 static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
-                           unsigned offset, unsigned size, unsigned value)
+                           unsigned offset, unsigned size, unsigned value,
+                           bool is_framebuffer)
 {
        struct si_context *sctx = (struct si_context*)ctx;
+       unsigned flush_flags;
 
        if (!size)
                return;
@@ -1081,12 +1083,15 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
        uint64_t va = r600_resource(dst)->gpu_address + offset;
 
        /* Flush the caches where the resource is bound. */
-       /* XXX only flush the caches where the buffer is bound. */
-       sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-                        SI_CONTEXT_INV_TC_L2 |
-                        SI_CONTEXT_INV_KCACHE |
-                        SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
-       sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+       if (is_framebuffer)
+               flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+       else
+               flush_flags = SI_CONTEXT_INV_TC_L1 |
+                             SI_CONTEXT_INV_TC_L2 |
+                             SI_CONTEXT_INV_KCACHE;
+
+       sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+                        flush_flags;
 
        while (size) {
                unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@@ -1120,17 +1125,16 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 
        /* Flush the caches again in case the 3D engine has been prefetching
         * the resource. */
-       /* XXX only flush the caches where the buffer is bound. */
-       sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-                        SI_CONTEXT_INV_TC_L2 |
-                        SI_CONTEXT_INV_KCACHE |
-                        SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+       sctx->b.flags |= flush_flags;
 }
 
 void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
-                   uint64_t dst_offset, uint64_t src_offset, unsigned size)
+                   uint64_t dst_offset, uint64_t src_offset, unsigned size,
+                   bool is_framebuffer)
 {
+       unsigned flush_flags;
+
        if (!size)
                return;
 
@@ -1144,11 +1148,15 @@ void si_copy_buffer(struct si_context *sctx,
        src_offset += r600_resource(src)->gpu_address;
 
        /* Flush the caches where the resource is bound. */
-       sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-                        SI_CONTEXT_INV_TC_L2 |
-                        SI_CONTEXT_INV_KCACHE |
-                        SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
-                        SI_CONTEXT_PS_PARTIAL_FLUSH;
+       if (is_framebuffer)
+               flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+       else
+               flush_flags = SI_CONTEXT_INV_TC_L1 |
+                             SI_CONTEXT_INV_TC_L2 |
+                             SI_CONTEXT_INV_KCACHE;
+
+       sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+                        flush_flags;
 
        while (size) {
                unsigned sync_flags = 0;
@@ -1180,10 +1188,9 @@ void si_copy_buffer(struct si_context *sctx,
                dst_offset += byte_count;
        }
 
-       sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
-                        SI_CONTEXT_INV_TC_L2 |
-                        SI_CONTEXT_INV_KCACHE |
-                        SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+       /* Flush the caches again in case the 3D engine has been prefetching
+        * the resource. */
+       sctx->b.flags |= flush_flags;
 }
 
 /* INIT/DEINIT */
index 4d8fd65d981570437fb0eb66f362a0bd07dfeb54..38bff31e0056d8bf70d7bdbbdfd8de12ca729770 100644 (file)
@@ -155,7 +155,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
 
                /* Clear the NULL constant buffer, because loads should return zeros. */
                sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
-                                    sctx->null_const_buf.buffer->width0, 0);
+                                    sctx->null_const_buf.buffer->width0, 0, false);
        }
 
        return &sctx->b.b;
index 8927e50644b114e72a9c554fc2b35675ecb9c277..3cd252c0e6420443b31453dd75ba0805a6014dc8 100644 (file)
@@ -238,7 +238,7 @@ void si_release_all_descriptors(struct si_context *sctx);
 void si_all_descriptors_begin_new_cs(struct si_context *sctx);
 void si_copy_buffer(struct si_context *sctx,
                    struct pipe_resource *dst, struct pipe_resource *src,
-                   uint64_t dst_offset, uint64_t src_offset, unsigned size);
+                   uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
 void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
                            const uint8_t *ptr, unsigned size, uint32_t *const_offset);