From edf18da85dd3b1865c4faaba650a8fa371b7103c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 29 Dec 2014 14:45:49 +0100 Subject: [PATCH] radeonsi: only flush the right set of caches for CP DMA operations MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit That's either framebuffer caches or caches for shader resources. The motivation is that framebuffer caches need to be flushed very rarely here. Reviewed-by: Michel Dänzer --- src/gallium/drivers/r600/r600_blit.c | 3 +- src/gallium/drivers/radeon/r600_pipe_common.c | 5 +- src/gallium/drivers/radeon/r600_pipe_common.h | 6 ++- src/gallium/drivers/radeon/r600_texture.c | 8 +-- src/gallium/drivers/radeon/radeon_video.c | 3 +- src/gallium/drivers/radeonsi/si_blit.c | 2 +- src/gallium/drivers/radeonsi/si_descriptors.c | 51 +++++++++++-------- src/gallium/drivers/radeonsi/si_pipe.c | 2 +- src/gallium/drivers/radeonsi/si_state.h | 2 +- 9 files changed, 48 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index bdc5f9f9bc2..01262a59e90 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -565,7 +565,8 @@ static void r600_copy_global_buffer(struct pipe_context *ctx, } static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value) + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer) { struct r600_context *rctx = (struct r600_context*)ctx; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index d46b3b38f86..04fc9c59c73 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -912,12 +912,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen, } void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value) + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer) { struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; pipe_mutex_lock(rscreen->aux_context_lock); - rctx->clear_buffer(&rctx->b, dst, offset, size, value); + rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer); rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); pipe_mutex_unlock(rscreen->aux_context_lock); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 15736d7b1c9..a9416b686ed 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -388,7 +388,8 @@ struct r600_common_context { const struct pipe_box *src_box); void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value); + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer); void (*blit_decompress_depth)(struct pipe_context *ctx, struct r600_texture *texture, @@ -441,7 +442,8 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour bool r600_can_dump_shader(struct r600_common_screen *rscreen, const struct tgsi_token *tokens); void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value); + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer); struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const struct pipe_resource *templ); const char *r600_get_llvm_processor_name(enum radeon_family family); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index fdf4d763f2d..ab8ce7bd713 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -559,7 +559,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, * without htile buffer */ R600_ERR("Failed to create buffer object for htile buffer.\n"); } else { - r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0); + r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, + htile_size, 0, true); } } @@ -638,7 +639,8 @@ r600_texture_create_object(struct pipe_screen *screen, if (rtex->cmask.size) { /* Initialize the cmask to 0xCC (= compressed state). */ r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, - rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC); + rtex->cmask.offset, rtex->cmask.size, + 0xCCCCCCCC, true); } /* Initialize the CMASK base register value. */ @@ -1273,7 +1275,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, /* Do the fast clear. */ evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, - tex->cmask.offset, tex->cmask.size, 0); + tex->cmask.offset, tex->cmask.size, 0, true); tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; fb_state->dirty = true; diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index f6cfdff7aa2..14207989325 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -122,7 +122,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) { struct r600_common_context *rctx = (struct r600_common_context*)context; - rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, 0); + rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, + 0, false); context->flush(context, NULL, 0); } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 4744154c7e2..1f2c4082dbc 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -556,7 +556,7 @@ void si_resource_copy_region(struct pipe_context *ctx, /* Fallback for buffers. */ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); + si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false); return; } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d46f4e522e6..c9599617ede 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1054,9 +1054,11 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value) + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer) { struct si_context *sctx = (struct si_context*)ctx; + unsigned flush_flags; if (!size) return; @@ -1081,12 +1083,15 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, uint64_t va = r600_resource(dst)->gpu_address + offset; /* Flush the caches where the resource is bound. */ - /* XXX only flush the caches where the buffer is bound. */ - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; - sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; + if (is_framebuffer) + flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + else + flush_flags = SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_INV_KCACHE; + + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + flush_flags; while (size) { unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); @@ -1120,17 +1125,16 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* Flush the caches again in case the 3D engine has been prefetching * the resource. */ - /* XXX only flush the caches where the buffer is bound. */ - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + sctx->b.flags |= flush_flags; } void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, - uint64_t dst_offset, uint64_t src_offset, unsigned size) + uint64_t dst_offset, uint64_t src_offset, unsigned size, + bool is_framebuffer) { + unsigned flush_flags; + if (!size) return; @@ -1144,11 +1148,15 @@ void si_copy_buffer(struct si_context *sctx, src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resource is bound. */ - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | - SI_CONTEXT_PS_PARTIAL_FLUSH; + if (is_framebuffer) + flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + else + flush_flags = SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_INV_KCACHE; + + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + flush_flags; while (size) { unsigned sync_flags = 0; @@ -1180,10 +1188,9 @@ void si_copy_buffer(struct si_context *sctx, dst_offset += byte_count; } - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + /* Flush the caches again in case the 3D engine has been prefetching + * the resource. */ + sctx->b.flags |= flush_flags; } /* INIT/DEINIT */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4d8fd65d981..38bff31e005 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -155,7 +155,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, - sctx->null_const_buf.buffer->width0, 0); + sctx->null_const_buf.buffer->width0, 0, false); } return &sctx->b.b; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8927e50644b..3cd252c0e64 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -238,7 +238,7 @@ void si_release_all_descriptors(struct si_context *sctx); void si_all_descriptors_begin_new_cs(struct si_context *sctx); void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, - uint64_t dst_offset, uint64_t src_offset, unsigned size); + uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer); void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset); -- 2.30.2