From: Marek Olšák Date: Sun, 30 Jun 2013 17:57:59 +0000 (+0200) Subject: r600g: only flush the caches that need to be flushed during CP DMA operations X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7948ed1250cae78ae1b22dbce4ab23aceacc6159;p=mesa.git r600g: only flush the caches that need to be flushed during CP DMA operations This should increase performance if constant uploads are done with the CP DMA, because only the cache that needs to be flushed is flushed. Reviewed-by: Alex Deucher --- diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 075ab17a557..c428bc1f6cc 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -121,18 +121,8 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, offset += r600_resource_va(&rctx->screen->screen, dst); - /* We flush the caches, because we might read from or write - * to resources which are bound right now. */ - rctx->flags |= R600_CONTEXT_INV_CONST_CACHE | - R600_CONTEXT_INV_VERTEX_CACHE | - R600_CONTEXT_INV_TEX_CACHE | - R600_CONTEXT_FLUSH_AND_INV | - R600_CONTEXT_FLUSH_AND_INV_CB | - R600_CONTEXT_FLUSH_AND_INV_DB | - R600_CONTEXT_FLUSH_AND_INV_CB_META | - R600_CONTEXT_FLUSH_AND_INV_DB_META | - R600_CONTEXT_STREAMOUT_FLUSH | - R600_CONTEXT_WAIT_3D_IDLE; + /* Flush the cache where the resource is bound. */ + r600_flag_resource_cache_flush(rctx, dst); while (size) { unsigned sync = 0; @@ -169,10 +159,9 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx, offset += byte_count; } - /* Invalidate the read caches. */ - rctx->flags |= R600_CONTEXT_INV_CONST_CACHE | - R600_CONTEXT_INV_VERTEX_CACHE | - R600_CONTEXT_INV_TEX_CACHE; + /* Flush the cache again in case the 3D engine has been prefetching + * the resource. */ + r600_flag_resource_cache_flush(rctx, dst); util_range_add(&r600_resource(dst)->valid_buffer_range, offset, offset + size); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index ff36573ab5e..e2444cc5cdc 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -626,18 +626,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, dst_offset += r600_resource_va(&rctx->screen->screen, dst); src_offset += r600_resource_va(&rctx->screen->screen, src); - /* We flush the caches, because we might read from or write - * to resources which are bound right now. */ - rctx->flags |= R600_CONTEXT_INV_CONST_CACHE | - R600_CONTEXT_INV_VERTEX_CACHE | - R600_CONTEXT_INV_TEX_CACHE | - R600_CONTEXT_FLUSH_AND_INV | - R600_CONTEXT_FLUSH_AND_INV_CB | - R600_CONTEXT_FLUSH_AND_INV_DB | - R600_CONTEXT_FLUSH_AND_INV_CB_META | - R600_CONTEXT_FLUSH_AND_INV_DB_META | - R600_CONTEXT_STREAMOUT_FLUSH | - R600_CONTEXT_WAIT_3D_IDLE; + /* Flush the caches where the resources are bound. */ + r600_flag_resource_cache_flush(rctx, src); + r600_flag_resource_cache_flush(rctx, dst); /* There are differences between R700 and EG in CP DMA, * but we only use the common bits here. */ @@ -679,10 +670,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, dst_offset += byte_count; } - /* Invalidate the read caches. */ - rctx->flags |= R600_CONTEXT_INV_CONST_CACHE | - R600_CONTEXT_INV_VERTEX_CACHE | - R600_CONTEXT_INV_TEX_CACHE; + /* Flush the cache of the dst resource again in case the 3D engine + * has been prefetching it. */ + r600_flag_resource_cache_flush(rctx, dst); util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); @@ -736,3 +726,107 @@ void r600_dma_copy(struct r600_context *rctx, util_range_add(&rdst->valid_buffer_range, dst_offset, dst_offset + size); } + +/* Flag the cache of the resource for it to be flushed later if the resource + * is bound. Otherwise do nothing. Used for synchronization between engines. + */ +void r600_flag_resource_cache_flush(struct r600_context *rctx, + struct pipe_resource *res) +{ + /* Check vertex buffers. */ + uint32_t mask = rctx->vertex_buffer_state.enabled_mask; + while (mask) { + uint32_t i = u_bit_scan(&mask); + if (rctx->vertex_buffer_state.vb[i].buffer == res) { + rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE; + } + } + + /* Check vertex buffers for compute. */ + mask = rctx->cs_vertex_buffer_state.enabled_mask; + while (mask) { + uint32_t i = u_bit_scan(&mask); + if (rctx->cs_vertex_buffer_state.vb[i].buffer == res) { + rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE; + } + } + + /* Check constant buffers. */ + unsigned shader; + for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) { + struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; + uint32_t mask = state->enabled_mask; + + while (mask) { + unsigned i = u_bit_scan(&mask); + if (state->cb[i].buffer == res) { + rctx->flags |= R600_CONTEXT_INV_CONST_CACHE; + + shader = PIPE_SHADER_TYPES; /* break the outer loop */ + break; + } + } + } + + /* Check textures. */ + for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) { + struct r600_samplerview_state *state = &rctx->samplers[shader].views; + uint32_t mask = state->enabled_mask; + + while (mask) { + uint32_t i = u_bit_scan(&mask); + if (&state->views[i]->tex_resource->b.b == res) { + rctx->flags |= R600_CONTEXT_INV_TEX_CACHE; + + shader = PIPE_SHADER_TYPES; /* break the outer loop */ + break; + } + } + } + + /* Check streamout buffers. */ + int i; + for (i = 0; i < rctx->streamout.num_targets; i++) { + if (rctx->streamout.targets[i]->b.buffer == res) { + rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH | + R600_CONTEXT_FLUSH_AND_INV | + R600_CONTEXT_WAIT_3D_IDLE; + break; + } + } + + /* Check colorbuffers. */ + for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) { + if (rctx->framebuffer.state.cbufs[i] && + rctx->framebuffer.state.cbufs[i]->texture == res) { + struct r600_texture *tex = + (struct r600_texture*)rctx->framebuffer.state.cbufs[i]->texture; + + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB | + R600_CONTEXT_FLUSH_AND_INV | + R600_CONTEXT_WAIT_3D_IDLE; + + if (tex->cmask_size || tex->fmask_size) { + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META; + } + break; + } + } + + /* Check a depth buffer. */ + if (rctx->framebuffer.state.zsbuf) { + if (rctx->framebuffer.state.zsbuf->texture == res) { + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB | + R600_CONTEXT_FLUSH_AND_INV | + R600_CONTEXT_WAIT_3D_IDLE; + } + + struct r600_texture *tex = + (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture; + if (tex && tex->htile && &tex->htile->b.b == res) { + rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META | + R600_CONTEXT_FLUSH_AND_INV | + R600_CONTEXT_WAIT_3D_IDLE; + } + } +} diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 66ea258647d..d5f54c4482e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -834,6 +834,8 @@ boolean r600_dma_blit(struct pipe_context *ctx, const struct pipe_box *src_box); void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom); void r600_emit_streamout_end(struct r600_context *ctx); +void r600_flag_resource_cache_flush(struct r600_context *rctx, + struct pipe_resource *res); /* * evergreen_hw_context.c