From: Marek Olšák Date: Mon, 29 Dec 2014 13:45:49 +0000 (+0100) Subject: radeonsi: only flush the right set of caches for CP DMA operations X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=edf18da85dd3b1865c4faaba650a8fa371b7103c;p=mesa.git radeonsi: only flush the right set of caches for CP DMA operations That's either framebuffer caches or caches for shader resources. The motivation is that framebuffer caches need to be flushed very rarely here. Reviewed-by: Michel Dänzer --- diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index bdc5f9f9bc2..01262a59e90 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -565,7 +565,8 @@ static void r600_copy_global_buffer(struct pipe_context *ctx, } static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value) + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer) { struct r600_context *rctx = (struct r600_context*)ctx; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index d46b3b38f86..04fc9c59c73 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -912,12 +912,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen, } void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value) + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer) { struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; pipe_mutex_lock(rscreen->aux_context_lock); - rctx->clear_buffer(&rctx->b, dst, offset, size, value); + rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer); rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); pipe_mutex_unlock(rscreen->aux_context_lock); } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 15736d7b1c9..a9416b686ed 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -388,7 +388,8 @@ struct r600_common_context { const struct pipe_box *src_box); void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value); + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer); void (*blit_decompress_depth)(struct pipe_context *ctx, struct r600_texture *texture, @@ -441,7 +442,8 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour bool r600_can_dump_shader(struct r600_common_screen *rscreen, const struct tgsi_token *tokens); void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value); + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer); struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, const struct pipe_resource *templ); const char *r600_get_llvm_processor_name(enum radeon_family family); diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index fdf4d763f2d..ab8ce7bd713 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -559,7 +559,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen, * without htile buffer */ R600_ERR("Failed to create buffer object for htile buffer.\n"); } else { - r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0); + r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, + htile_size, 0, true); } } @@ -638,7 +639,8 @@ r600_texture_create_object(struct pipe_screen *screen, if (rtex->cmask.size) { /* Initialize the cmask to 0xCC (= compressed state). */ r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b, - rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC); + rtex->cmask.offset, rtex->cmask.size, + 0xCCCCCCCC, true); } /* Initialize the CMASK base register value. */ @@ -1273,7 +1275,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx, /* Do the fast clear. */ evergreen_set_clear_color(tex, fb->cbufs[i]->format, color); rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b, - tex->cmask.offset, tex->cmask.size, 0); + tex->cmask.offset, tex->cmask.size, 0, true); tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level; fb_state->dirty = true; diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index f6cfdff7aa2..14207989325 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -122,7 +122,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer) { struct r600_common_context *rctx = (struct r600_common_context*)context; - rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, 0); + rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, + 0, false); context->flush(context, NULL, 0); } diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c index 4744154c7e2..1f2c4082dbc 100644 --- a/src/gallium/drivers/radeonsi/si_blit.c +++ b/src/gallium/drivers/radeonsi/si_blit.c @@ -556,7 +556,7 @@ void si_resource_copy_region(struct pipe_context *ctx, /* Fallback for buffers. */ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) { - si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width); + si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false); return; } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d46f4e522e6..c9599617ede 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1054,9 +1054,11 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource #define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8) static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, - unsigned offset, unsigned size, unsigned value) + unsigned offset, unsigned size, unsigned value, + bool is_framebuffer) { struct si_context *sctx = (struct si_context*)ctx; + unsigned flush_flags; if (!size) return; @@ -1081,12 +1083,15 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, uint64_t va = r600_resource(dst)->gpu_address + offset; /* Flush the caches where the resource is bound. */ - /* XXX only flush the caches where the buffer is bound. */ - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; - sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH; + if (is_framebuffer) + flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + else + flush_flags = SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_INV_KCACHE; + + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + flush_flags; while (size) { unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); @@ -1120,17 +1125,16 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* Flush the caches again in case the 3D engine has been prefetching * the resource. */ - /* XXX only flush the caches where the buffer is bound. */ - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + sctx->b.flags |= flush_flags; } void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, - uint64_t dst_offset, uint64_t src_offset, unsigned size) + uint64_t dst_offset, uint64_t src_offset, unsigned size, + bool is_framebuffer) { + unsigned flush_flags; + if (!size) return; @@ -1144,11 +1148,15 @@ void si_copy_buffer(struct si_context *sctx, src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resource is bound. */ - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | - SI_CONTEXT_PS_PARTIAL_FLUSH; + if (is_framebuffer) + flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + else + flush_flags = SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_INV_KCACHE; + + sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | + flush_flags; while (size) { unsigned sync_flags = 0; @@ -1180,10 +1188,9 @@ void si_copy_buffer(struct si_context *sctx, dst_offset += byte_count; } - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + /* Flush the caches again in case the 3D engine has been prefetching + * the resource. */ + sctx->b.flags |= flush_flags; } /* INIT/DEINIT */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 4d8fd65d981..38bff31e005 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -155,7 +155,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * /* Clear the NULL constant buffer, because loads should return zeros. */ sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0, - sctx->null_const_buf.buffer->width0, 0); + sctx->null_const_buf.buffer->width0, 0, false); } return &sctx->b.b; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 8927e50644b..3cd252c0e64 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -238,7 +238,7 @@ void si_release_all_descriptors(struct si_context *sctx); void si_all_descriptors_begin_new_cs(struct si_context *sctx); void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src, - uint64_t dst_offset, uint64_t src_offset, unsigned size); + uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer); void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, const uint8_t *ptr, unsigned size, uint32_t *const_offset);