r600g: do fine-grained vertex buffer updates
authorMarek Olšák <maraeo@gmail.com>
Fri, 6 Jul 2012 01:18:06 +0000 (03:18 +0200)
committerMarek Olšák <maraeo@gmail.com>
Tue, 17 Jul 2012 19:22:14 +0000 (21:22 +0200)
If only some buffers are changed, the other ones don't have to re-emitted.
This uses bitmasks of enabled and dirty buffers just like
emit_constant_buffers does.

src/gallium/drivers/r600/evergreen_compute.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_blit.c
src/gallium/drivers/r600/r600_buffer.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state.c
src/gallium/drivers/r600/r600_state_common.c

index 947a3283b0fdc34835f0ec9d11de3cdef2433443..caaa752b77bc0ddfe9f25f84350c426cf33a6bcc 100644 (file)
@@ -89,14 +89,15 @@ static void evergreen_cs_set_vertex_buffer(
        unsigned offset,
        struct pipe_resource * buffer)
 {
-       struct pipe_vertex_buffer *vb = &rctx->cs_vertex_buffer[vb_index];
-       struct r600_vertexbuf_state * state = &rctx->cs_vertex_buffer_state;
+       struct r600_vertexbuf_state *state = &rctx->cs_vertex_buffer_state;
+       struct pipe_vertex_buffer *vb = &state->vb[vb_index];
        vb->stride = 1;
        vb->buffer_offset = offset;
        vb->buffer = buffer;
        vb->user_buffer = NULL;
 
        r600_inval_vertex_cache(rctx);
+       state->enabled_mask |= 1 << vb_index;
        state->dirty_mask |= 1 << vb_index;
        r600_atom_dirty(rctx, &state->atom);
 }
@@ -369,7 +370,7 @@ static void compute_emit_cs(struct r600_context *ctx)
        r600_context_pipe_state_emit(ctx, cb_state, RADEON_CP_PACKET3_COMPUTE_MODE);
 
        /* Emit vertex buffer state */
-       ctx->cs_vertex_buffer_state.atom.num_dw = 12 * ctx->nr_cs_vertex_buffers;
+       ctx->cs_vertex_buffer_state.atom.num_dw = 12 * util_bitcount(ctx->cs_vertex_buffer_state.dirty_mask);
        r600_emit_atom(ctx, &ctx->cs_vertex_buffer_state.atom);
 
        for (i = 0; i < get_compute_resource_num(); i++) {
@@ -493,10 +494,8 @@ static void evergreen_set_compute_resources(struct pipe_context * ctx_,
                        evergreen_cs_set_vertex_buffer(ctx, vtx_id,
                                        buffer->chunk->start_in_dw * 4,
                                        resources[i]->base.texture);
-                       ctx->nr_cs_vertex_buffers = vtx_id + 1;
                }
        }
-
 }
 
 static void evergreen_set_cs_sampler_view(struct pipe_context *ctx_,
@@ -740,7 +739,8 @@ void evergreen_init_compute_state_functions(struct r600_context *ctx)
 
        /* We always use at least two vertex buffers for compute, one for
          * parameters and one for global memory */
-       ctx->nr_cs_vertex_buffers = 2;
+       ctx->cs_vertex_buffer_state.enabled_mask =
+       ctx->cs_vertex_buffer_state.dirty_mask = 1 | 2;
 }
 
 
index 0d2fa3045a44768afd343f38ccf0e75f8359f405..72ddc0b6c7bd308c58822be091d1b46e0b68e098 100644 (file)
@@ -1772,8 +1772,6 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_
 
 static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
                                          struct r600_vertexbuf_state *state,
-                                         struct pipe_vertex_buffer *vertex_buffers,
-                                         unsigned vb_count,
                                          unsigned resource_offset,
                                          unsigned pkt_flags)
 {
@@ -1784,13 +1782,11 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
                struct pipe_vertex_buffer *vb;
                struct r600_resource *rbuffer;
                uint64_t va;
-               unsigned buffer_index = ffs(dirty_mask) - 1;
+               unsigned buffer_index = u_bit_scan(&dirty_mask);
 
-               vb = &vertex_buffers[buffer_index];
+               vb = &state->vb[buffer_index];
                rbuffer = (struct r600_resource*)vb->buffer;
-               if (!rbuffer) {
-                       goto next;
-               }
+               assert(rbuffer);
 
                va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b);
                va += vb->buffer_offset;
@@ -1816,26 +1812,19 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
 
                r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
                r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ));
-
-next:
-               dirty_mask &= ~(1 << buffer_index);
        }
        state->dirty_mask = 0;
 }
 
 static void evergreen_fs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
 {
-       evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state,
-                                       rctx->vertex_buffer,
-                                       rctx->nr_vertex_buffers, 992, 0);
+       evergreen_emit_vertex_buffers(rctx, &rctx->vertex_buffer_state, 992, 0);
 }
 
 static void evergreen_cs_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom * atom)
 {
-       evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state,
-                                       rctx->cs_vertex_buffer,
-                                       rctx->nr_cs_vertex_buffers, 816,
-                                       RADEON_CP_PACKET3_COMPUTE_MODE);
+       evergreen_emit_vertex_buffers(rctx, &rctx->cs_vertex_buffer_state, 816,
+                                     RADEON_CP_PACKET3_COMPUTE_MODE);
 }
 
 static void evergreen_emit_constant_buffers(struct r600_context *rctx,
index 98f8b84a51a65b5c789ecb6223fc32aa2440944b..ca5aaf8c0b8836cad8d9a70f6ed85dd3c912fcbd 100644 (file)
@@ -60,8 +60,8 @@ static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op
                util_blitter_save_viewport(rctx->blitter, &rctx->viewport);
        }
        util_blitter_save_vertex_buffers(rctx->blitter,
-                                        rctx->nr_vertex_buffers,
-                                        rctx->vertex_buffer);
+                                        util_last_bit(rctx->vertex_buffer_state.enabled_mask),
+                                        rctx->vertex_buffer_state.vb);
        util_blitter_save_so_targets(rctx->blitter, rctx->num_so_targets,
                                     (struct pipe_stream_output_target**)rctx->so_targets);
 
index 8e2deb1ea7cf72d82bb9460593f1dcdfaf2b612f..165427eddf403f04563f1ba6dbb49884b982ba7e 100644 (file)
@@ -93,7 +93,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
                /* Check if mapping this buffer would cause waiting for the GPU. */
                if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) ||
                    rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) {
-                       unsigned i;
+                       unsigned i, mask;
 
                        /* Discard the buffer. */
                        pb_reference(&rbuffer->buf, NULL);
@@ -105,13 +105,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *pipe,
 
                        /* We changed the buffer, now we need to bind it where the old one was bound. */
                        /* Vertex buffers. */
-                       for (i = 0; i < rctx->nr_vertex_buffers; i++) {
-                               if (rctx->vertex_buffer[i].buffer == &rbuffer->b.b) {
-                                       struct r600_vertexbuf_state * state =
-                                               &rctx->vertex_buffer_state;
-                                       state->dirty_mask |= 1 << i;
-                                       r600_inval_vertex_cache(rctx);
-                                       r600_atom_dirty(rctx, &state->atom);
+                       mask = rctx->vertex_buffer_state.enabled_mask;
+                       while (mask) {
+                               i = u_bit_scan(&mask);
+                               if (rctx->vertex_buffer_state.vb[i].buffer == &rbuffer->b.b) {
+                                       rctx->vertex_buffer_state.dirty_mask |= 1 << i;
+                                       r600_vertex_buffers_dirty(rctx);
                                }
                        }
                        /* Streamout buffers. */
index 2951b86b7756aef9201daf7a417c44c6cd695861..d0a5918d1fc6dccbaa7db97c4f2827b606bba526 100644 (file)
@@ -1274,14 +1274,15 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
        r600_emit_atom(ctx, &ctx->start_cs_cmd.atom);
 
        /* Invalidate caches. */
-       r600_inval_vertex_cache(ctx);
        r600_inval_texture_cache(ctx);
        r600_flush_framebuffer(ctx, false);
 
        /* Re-emit states. */
        r600_atom_dirty(ctx, &ctx->cb_misc_state.atom);
        r600_atom_dirty(ctx, &ctx->db_misc_state.atom);
-       r600_atom_dirty(ctx, &ctx->vertex_buffer_state.atom);
+
+       ctx->vertex_buffer_state.dirty_mask = ctx->vertex_buffer_state.enabled_mask;
+       r600_vertex_buffers_dirty(ctx);
 
        ctx->vs_constbuf_state.dirty_mask = ctx->vs_constbuf_state.enabled_mask;
        ctx->ps_constbuf_state.dirty_mask = ctx->ps_constbuf_state.enabled_mask;
index 6449a4d4042912668b6e847c436ba3a21d388867..200f0a2ca59fec381bcb0d568843cff3ed7fa967 100644 (file)
@@ -278,6 +278,8 @@ struct r600_constbuf_state
 struct r600_vertexbuf_state
 {
        struct r600_atom                atom;
+       struct pipe_vertex_buffer       vb[PIPE_MAX_ATTRIBS];
+       uint32_t                        enabled_mask; /* non-NULL buffers */
        uint32_t                        dirty_mask;
 };
 
@@ -399,13 +401,8 @@ struct r600_context {
 
        boolean                 dual_src_blend;
 
-       /* Vertex and index buffers. */
-       bool                    vertex_buffers_dirty;
+       /* Index buffer. */
        struct pipe_index_buffer index_buffer;
-       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-       unsigned                nr_vertex_buffers;
-       struct pipe_vertex_buffer cs_vertex_buffer[PIPE_MAX_ATTRIBS];
-       unsigned                nr_cs_vertex_buffers;
 };
 
 static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom)
@@ -528,8 +525,9 @@ unsigned r600_get_cb_flush_flags(struct r600_context *rctx);
 void r600_texture_barrier(struct pipe_context *ctx);
 void r600_set_index_buffer(struct pipe_context *ctx,
                           const struct pipe_index_buffer *ib);
+void r600_vertex_buffers_dirty(struct r600_context *rctx);
 void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
-                            const struct pipe_vertex_buffer *buffers);
+                            const struct pipe_vertex_buffer *input);
 void *r600_create_vertex_elements(struct pipe_context *ctx,
                                  unsigned count,
                                  const struct pipe_vertex_element *elements);
index 3d5835cfcdd0a4646c332facc9f830527aa7dfb1..4f475b3d8d4677a17b738b5c1a6b8518c34d89c6 100644 (file)
@@ -1748,27 +1748,28 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom
 static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom)
 {
        struct radeon_winsys_cs *cs = rctx->cs;
-       struct pipe_vertex_buffer *vb = rctx->vertex_buffer;
-       unsigned count = rctx->nr_vertex_buffers;
-       unsigned i, offset;
+       uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask;
 
-       for (i = 0; i < count; i++) {
-               struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer;
+       while (dirty_mask) {
+               struct pipe_vertex_buffer *vb;
+               struct r600_resource *rbuffer;
+               unsigned offset;
+               unsigned buffer_index = u_bit_scan(&dirty_mask);
 
-               if (!rbuffer) {
-                       continue;
-               }
+               vb = &rctx->vertex_buffer_state.vb[buffer_index];
+               rbuffer = (struct r600_resource*)vb->buffer;
+               assert(rbuffer);
 
-               offset = vb[i].buffer_offset;
+               offset = vb->buffer_offset;
 
                /* fetch resources start at index 320 */
                r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
-               r600_write_value(cs, (320 + i) * 7);
+               r600_write_value(cs, (320 + buffer_index) * 7);
                r600_write_value(cs, offset); /* RESOURCEi_WORD0 */
                r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
                r600_write_value(cs, /* RESOURCEi_WORD2 */
                                 S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
-                                S_038008_STRIDE(vb[i].stride));
+                                S_038008_STRIDE(vb->stride));
                r600_write_value(cs, 0); /* RESOURCEi_WORD3 */
                r600_write_value(cs, 0); /* RESOURCEi_WORD4 */
                r600_write_value(cs, 0); /* RESOURCEi_WORD5 */
index 3c42a44764312b060241540c33c16a2f1bacf672..4fa269919a130d2786a356d0700ae062ff5f1ed5 100644 (file)
@@ -403,22 +403,58 @@ void r600_set_index_buffer(struct pipe_context *ctx,
        }
 }
 
+void r600_vertex_buffers_dirty(struct r600_context *rctx)
+{
+       if (rctx->vertex_buffer_state.dirty_mask) {
+               r600_inval_vertex_cache(rctx);
+               rctx->vertex_buffer_state.atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
+                                              util_bitcount(rctx->vertex_buffer_state.dirty_mask);
+               r600_atom_dirty(rctx, &rctx->vertex_buffer_state.atom);
+       }
+}
+
 void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count,
-                            const struct pipe_vertex_buffer *buffers)
+                            const struct pipe_vertex_buffer *input)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct r600_vertexbuf_state * state = &rctx->vertex_buffer_state;
+       struct r600_vertexbuf_state *state = &rctx->vertex_buffer_state;
+       struct pipe_vertex_buffer *vb = state->vb;
        unsigned i;
+       /* This sets 1-bit for buffers with index >= count. */
+       uint32_t disable_mask = ~((1ull << count) - 1);
+       /* These are the new buffers set by this function. */
+       uint32_t new_buffer_mask = 0;
+
+       /* Set buffers with index >= count to NULL. */
+       uint32_t remaining_buffers_mask =
+               rctx->vertex_buffer_state.enabled_mask & disable_mask;
+
+       while (remaining_buffers_mask) {
+               i = u_bit_scan(&remaining_buffers_mask);
+               pipe_resource_reference(&vb[i].buffer, NULL);
+       }
 
-       util_copy_vertex_buffers(rctx->vertex_buffer, &rctx->nr_vertex_buffers, buffers, count);
+       /* Set vertex buffers. */
+       for (i = 0; i < count; i++) {
+               if (memcmp(&input[i], &vb[i], sizeof(struct pipe_vertex_buffer))) {
+                       if (input[i].buffer) {
+                               vb[i].stride = input[i].stride;
+                               vb[i].buffer_offset = input[i].buffer_offset;
+                               pipe_resource_reference(&vb[i].buffer, input[i].buffer);
+                               new_buffer_mask |= 1 << i;
+                       } else {
+                               pipe_resource_reference(&vb[i].buffer, NULL);
+                               disable_mask |= 1 << i;
+                       }
+               }
+        }
 
-       r600_inval_vertex_cache(rctx);
-       state->atom.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 11) *
-                                          rctx->nr_vertex_buffers;
-       for (i = 0 ; i < rctx->nr_vertex_buffers; i++) {
-               state->dirty_mask |= 1 << i;
-       }
-       r600_atom_dirty(rctx, &state->atom);
+       rctx->vertex_buffer_state.enabled_mask &= ~disable_mask;
+       rctx->vertex_buffer_state.dirty_mask &= rctx->vertex_buffer_state.enabled_mask;
+       rctx->vertex_buffer_state.enabled_mask |= new_buffer_mask;
+       rctx->vertex_buffer_state.dirty_mask |= new_buffer_mask;
+
+       r600_vertex_buffers_dirty(rctx);
 }
 
 void *r600_create_vertex_elements(struct pipe_context *ctx,