r600g: suballocate memory for the STRMOUT_BUFFER_FILLED_SIZE register
authorMarek Olšák <maraeo@gmail.com>
Sun, 9 Dec 2012 16:56:26 +0000 (17:56 +0100)
committerMarek Olšák <maraeo@gmail.com>
Wed, 12 Dec 2012 12:12:28 +0000 (13:12 +0100)
Instead of having a 4-byte buffer for each streamout target, we suballocate
each dword from a 4K buffer.

This further reduces the overall number of relocations.

Tested-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/r600/r600.h
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_state_common.c

index 7d434169d9ea9c6847214999fa12b1bb6dcc3946..d15cd5256fad267d1d413d0d844552c93419da0f 100644 (file)
@@ -175,7 +175,9 @@ struct r600_so_target {
        struct pipe_stream_output_target b;
 
        /* The buffer where BUFFER_FILLED_SIZE is stored. */
-       struct r600_resource    *filled_size;
+       struct r600_resource    *buf_filled_size;
+       unsigned                buf_filled_size_offset;
+
        unsigned                stride_in_dw;
        unsigned                so_index;
 };
index c825301844655e5de3e7617d2620a4311fbceba5..c7a357e15ed13cc0a9380e3fedc800c4e1f802ff 100644 (file)
@@ -1005,7 +1005,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
                        if (ctx->streamout_append_bitmask & (1 << i)) {
                                va = r600_resource_va(&ctx->screen->screen,
-                                                     (void*)t[i]->filled_size);
+                                                     (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
                                /* Append. */
                                cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
                                cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
@@ -1017,7 +1017,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
 
                                cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
                                cs->buf[cs->cdw++] =
-                                       r600_context_bo_reloc(ctx,  t[i]->filled_size,
+                                       r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
                                                              RADEON_USAGE_READ);
                        } else {
                                /* Start from the beginning. */
@@ -1054,7 +1054,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
        for (i = 0; i < ctx->num_so_targets; i++) {
                if (t[i]) {
                        va = r600_resource_va(&ctx->screen->screen,
-                                             (void*)t[i]->filled_size);
+                                             (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
                        cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
                        cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
                                                       STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
@@ -1066,7 +1066,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
 
                        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
                        cs->buf[cs->cdw++] =
-                               r600_context_bo_reloc(ctx,  t[i]->filled_size,
+                               r600_context_bo_reloc(ctx,  t[i]->buf_filled_size,
                                                      RADEON_USAGE_WRITE);
 
                }
index deed4ec3c10b91f48a08ed1ea250d17a1bc808a1..90289e582abb8524bc1572e63805bd655dd807ce 100644 (file)
@@ -185,6 +185,9 @@ static void r600_destroy_context(struct pipe_context *context)
        if (rctx->uploader) {
                u_upload_destroy(rctx->uploader);
        }
+       if (rctx->allocator_so_filled_size) {
+               u_suballocator_destroy(rctx->allocator_so_filled_size);
+       }
        util_slab_destroy(&rctx->pool_transfers);
 
        r600_release_command_buffer(&rctx->start_cs_cmd);
@@ -291,6 +294,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
         if (!rctx->uploader)
                 goto fail;
 
+       rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4,
+                                                               0, PIPE_USAGE_STATIC, TRUE);
+        if (!rctx->allocator_so_filled_size)
+                goto fail;
+
        rctx->blitter = util_blitter_create(&rctx->context);
        if (rctx->blitter == NULL)
                goto fail;
index a61a6e8c08260f495f3bbd560bcafb59b332dd69..e707a4adda6015ba8c26e851c964220ee1211cb4 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "util/u_blitter.h"
 #include "util/u_slab.h"
+#include "util/u_suballoc.h"
 #include "r600.h"
 #include "r600_llvm.h"
 #include "r600_public.h"
@@ -389,6 +390,7 @@ struct r600_context {
        struct radeon_winsys_cs         *cs;
        struct blitter_context          *blitter;
        struct u_upload_mgr             *uploader;
+       struct u_suballocator           *allocator_so_filled_size;
        struct util_slab_mempool        pool_transfers;
 
        /* Hardware info. */
index b132850f0c783405b1db412e3ff944c38ccc7c4c..66120cad27dd8f2707e1aa4fab7a70ea046c6399 100644 (file)
@@ -956,25 +956,25 @@ r600_create_so_target(struct pipe_context *ctx,
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
        struct r600_so_target *t;
-       void *ptr;
 
        t = CALLOC_STRUCT(r600_so_target);
        if (!t) {
                return NULL;
        }
 
+       u_suballocator_alloc(rctx->allocator_so_filled_size, 4,
+                            &t->buf_filled_size_offset,
+                            (struct pipe_resource**)&t->buf_filled_size);
+       if (!t->buf_filled_size) {
+               FREE(t);
+               return NULL;
+       }
+
        t->b.reference.count = 1;
        t->b.context = ctx;
        pipe_resource_reference(&t->b.buffer, buffer);
        t->b.buffer_offset = buffer_offset;
        t->b.buffer_size = buffer_size;
-
-       t->filled_size = (struct r600_resource*)
-               pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STATIC, 4);
-       ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
-       memset(ptr, 0, t->filled_size->buf->size);
-       rctx->ws->buffer_unmap(t->filled_size->cs_buf);
-
        return &t->b;
 }
 
@@ -983,7 +983,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
 {
        struct r600_so_target *t = (struct r600_so_target*)target;
        pipe_resource_reference(&t->b.buffer, NULL);
-       pipe_resource_reference((struct pipe_resource**)&t->filled_size, NULL);
+       pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL);
        FREE(t);
 }
 
@@ -1308,7 +1308,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
        } else {
                if (info.count_from_stream_output) {
                        struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
-                       uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->filled_size);
+                       uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->buf_filled_size) + t->buf_filled_size_offset;
 
                        r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
 
@@ -1320,7 +1320,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
                        cs->buf[cs->cdw++] = 0; /* unused */
 
                        cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
-                       cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->filled_size, RADEON_USAGE_READ);
+                       cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ);
                }
 
                cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing);