r600g: only flush the caches that need to be flushed during CP DMA operations
authorMarek Olšák <maraeo@gmail.com>
Sun, 30 Jun 2013 17:57:59 +0000 (19:57 +0200)
committerMarek Olšák <maraeo@gmail.com>
Mon, 8 Jul 2013 18:25:18 +0000 (20:25 +0200)
This should increase performance if constant uploads are done with the CP DMA,
because only the cache that needs to be flushed is flushed.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/r600/evergreen_hw_context.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h

index 075ab17a557be5aadd8d082d923d24a85496cc33..c428bc1f6cc32f7a96ed36a4f9a61e0097064846 100644 (file)
@@ -121,18 +121,8 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
 
        offset += r600_resource_va(&rctx->screen->screen, dst);
 
-       /* We flush the caches, because we might read from or write
-        * to resources which are bound right now. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE |
-                      R600_CONTEXT_FLUSH_AND_INV |
-                      R600_CONTEXT_FLUSH_AND_INV_CB |
-                      R600_CONTEXT_FLUSH_AND_INV_DB |
-                      R600_CONTEXT_FLUSH_AND_INV_CB_META |
-                      R600_CONTEXT_FLUSH_AND_INV_DB_META |
-                      R600_CONTEXT_STREAMOUT_FLUSH |
-                      R600_CONTEXT_WAIT_3D_IDLE;
+       /* Flush the cache where the resource is bound. */
+       r600_flag_resource_cache_flush(rctx, dst);
 
        while (size) {
                unsigned sync = 0;
@@ -169,10 +159,9 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
                offset += byte_count;
        }
 
-       /* Invalidate the read caches. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE;
+       /* Flush the cache again in case the 3D engine has been prefetching
+        * the resource. */
+       r600_flag_resource_cache_flush(rctx, dst);
 
        util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
                       offset + size);
index ff36573ab5eaec7ec6e8c73a09ae716701432024..e2444cc5cdc482e90dcdc7f45f41a0d0e9e6f021 100644 (file)
@@ -626,18 +626,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
        dst_offset += r600_resource_va(&rctx->screen->screen, dst);
        src_offset += r600_resource_va(&rctx->screen->screen, src);
 
-       /* We flush the caches, because we might read from or write
-        * to resources which are bound right now. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE |
-                      R600_CONTEXT_FLUSH_AND_INV |
-                      R600_CONTEXT_FLUSH_AND_INV_CB |
-                      R600_CONTEXT_FLUSH_AND_INV_DB |
-                      R600_CONTEXT_FLUSH_AND_INV_CB_META |
-                      R600_CONTEXT_FLUSH_AND_INV_DB_META |
-                      R600_CONTEXT_STREAMOUT_FLUSH |
-                      R600_CONTEXT_WAIT_3D_IDLE;
+       /* Flush the caches where the resources are bound. */
+       r600_flag_resource_cache_flush(rctx, src);
+       r600_flag_resource_cache_flush(rctx, dst);
 
        /* There are differences between R700 and EG in CP DMA,
         * but we only use the common bits here. */
@@ -679,10 +670,9 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
                dst_offset += byte_count;
        }
 
-       /* Invalidate the read caches. */
-       rctx->flags |= R600_CONTEXT_INV_CONST_CACHE |
-                      R600_CONTEXT_INV_VERTEX_CACHE |
-                      R600_CONTEXT_INV_TEX_CACHE;
+       /* Flush the cache of the dst resource again in case the 3D engine
+        * has been prefetching it. */
+       r600_flag_resource_cache_flush(rctx, dst);
 
        util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset,
                       dst_offset + size);
@@ -736,3 +726,107 @@ void r600_dma_copy(struct r600_context *rctx,
        util_range_add(&rdst->valid_buffer_range, dst_offset,
                       dst_offset + size);
 }
+
+/* Flag the cache of the resource for it to be flushed later if the resource
+ * is bound. Otherwise do nothing. Used for synchronization between engines.
+ */
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+                                   struct pipe_resource *res)
+{
+       /* Check vertex buffers. */
+       uint32_t mask = rctx->vertex_buffer_state.enabled_mask;
+       while (mask) {
+               uint32_t i = u_bit_scan(&mask);
+               if (rctx->vertex_buffer_state.vb[i].buffer == res) {
+                       rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+               }
+       }
+
+       /* Check vertex buffers for compute. */
+       mask = rctx->cs_vertex_buffer_state.enabled_mask;
+       while (mask) {
+               uint32_t i = u_bit_scan(&mask);
+               if (rctx->cs_vertex_buffer_state.vb[i].buffer == res) {
+                       rctx->flags |= R600_CONTEXT_INV_VERTEX_CACHE;
+               }
+       }
+
+       /* Check constant buffers. */
+       unsigned shader;
+       for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+               struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
+               uint32_t mask = state->enabled_mask;
+
+               while (mask) {
+                       unsigned i = u_bit_scan(&mask);
+                       if (state->cb[i].buffer == res) {
+                               rctx->flags |= R600_CONTEXT_INV_CONST_CACHE;
+
+                               shader = PIPE_SHADER_TYPES; /* break the outer loop */
+                               break;
+                       }
+               }
+       }
+
+       /* Check textures. */
+       for (shader = 0; shader < PIPE_SHADER_TYPES; shader++) {
+               struct r600_samplerview_state *state = &rctx->samplers[shader].views;
+               uint32_t mask = state->enabled_mask;
+
+               while (mask) {
+                       uint32_t i = u_bit_scan(&mask);
+                       if (&state->views[i]->tex_resource->b.b == res) {
+                               rctx->flags |= R600_CONTEXT_INV_TEX_CACHE;
+
+                               shader = PIPE_SHADER_TYPES; /* break the outer loop */
+                               break;
+                       }
+               }
+       }
+
+       /* Check streamout buffers. */
+       int i;
+       for (i = 0; i < rctx->streamout.num_targets; i++) {
+               if (rctx->streamout.targets[i]->b.buffer == res) {
+                       rctx->flags |= R600_CONTEXT_STREAMOUT_FLUSH |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+                       break;
+               }
+       }
+
+       /* Check colorbuffers. */
+       for (i = 0; i < rctx->framebuffer.state.nr_cbufs; i++) {
+               if (rctx->framebuffer.state.cbufs[i] &&
+                   rctx->framebuffer.state.cbufs[i]->texture == res) {
+                       struct r600_texture *tex =
+                               (struct r600_texture*)rctx->framebuffer.state.cbufs[i]->texture;
+
+                       rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+
+                       if (tex->cmask_size || tex->fmask_size) {
+                               rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_CB_META;
+                       }
+                       break;
+               }
+       }
+
+       /* Check a depth buffer. */
+       if (rctx->framebuffer.state.zsbuf) {
+               if (rctx->framebuffer.state.zsbuf->texture == res) {
+                       rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+               }
+
+               struct r600_texture *tex =
+                       (struct r600_texture*)rctx->framebuffer.state.zsbuf->texture;
+               if (tex && tex->htile && &tex->htile->b.b == res) {
+                       rctx->flags |= R600_CONTEXT_FLUSH_AND_INV_DB_META |
+                                      R600_CONTEXT_FLUSH_AND_INV |
+                                      R600_CONTEXT_WAIT_3D_IDLE;
+               }
+       }
+}
index 66ea258647de6d637e2c91f5ffdb56699c0e0eaf..d5f54c4482efb228001593543d7c0f6e650549bb 100644 (file)
@@ -834,6 +834,8 @@ boolean r600_dma_blit(struct pipe_context *ctx,
                        const struct pipe_box *src_box);
 void r600_emit_streamout_begin(struct r600_context *ctx, struct r600_atom *atom);
 void r600_emit_streamout_end(struct r600_context *ctx);
+void r600_flag_resource_cache_flush(struct r600_context *rctx,
+                                   struct pipe_resource *res);
 
 /*
  * evergreen_hw_context.c