From 8df3855eed67302e83e4b181c4fa02183ccc185a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 9 Dec 2012 17:56:26 +0100 Subject: [PATCH] r600g: suballocate memory for the STRMOUT_BUFFER_FILLED_SIZE register Instead of having a 4-byte buffer for each streamout target, we suballocate each dword from a 4K buffer. This further reduces the overall number of relocations. Tested-by: Aaron Watry Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/r600.h | 4 +++- src/gallium/drivers/r600/r600_hw_context.c | 8 +++---- src/gallium/drivers/r600/r600_pipe.c | 8 +++++++ src/gallium/drivers/r600/r600_pipe.h | 2 ++ src/gallium/drivers/r600/r600_state_common.c | 22 ++++++++++---------- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 7d434169d9e..d15cd5256fa 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -175,7 +175,9 @@ struct r600_so_target { struct pipe_stream_output_target b; /* The buffer where BUFFER_FILLED_SIZE is stored. */ - struct r600_resource *filled_size; + struct r600_resource *buf_filled_size; + unsigned buf_filled_size_offset; + unsigned stride_in_dw; unsigned so_index; }; diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index c8253018446..c7a357e15ed 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -1005,7 +1005,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) if (ctx->streamout_append_bitmask & (1 << i)) { va = r600_resource_va(&ctx->screen->screen, - (void*)t[i]->filled_size); + (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset; /* Append. */ cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | @@ -1017,7 +1017,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, t[i]->filled_size, + r600_context_bo_reloc(ctx, t[i]->buf_filled_size, RADEON_USAGE_READ); } else { /* Start from the beginning. */ @@ -1054,7 +1054,7 @@ void r600_context_streamout_end(struct r600_context *ctx) for (i = 0; i < ctx->num_so_targets; i++) { if (t[i]) { va = r600_resource_va(&ctx->screen->screen, - (void*)t[i]->filled_size); + (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset; cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0); cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) | STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | @@ -1066,7 +1066,7 @@ void r600_context_streamout_end(struct r600_context *ctx) cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, t[i]->filled_size, + r600_context_bo_reloc(ctx, t[i]->buf_filled_size, RADEON_USAGE_WRITE); } diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index deed4ec3c10..90289e582ab 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -185,6 +185,9 @@ static void r600_destroy_context(struct pipe_context *context) if (rctx->uploader) { u_upload_destroy(rctx->uploader); } + if (rctx->allocator_so_filled_size) { + u_suballocator_destroy(rctx->allocator_so_filled_size); + } util_slab_destroy(&rctx->pool_transfers); r600_release_command_buffer(&rctx->start_cs_cmd); @@ -291,6 +294,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void if (!rctx->uploader) goto fail; + rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4, + 0, PIPE_USAGE_STATIC, TRUE); + if (!rctx->allocator_so_filled_size) + goto fail; + rctx->blitter = util_blitter_create(&rctx->context); if (rctx->blitter == NULL) goto fail; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index a61a6e8c082..e707a4adda6 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -28,6 +28,7 @@ #include "util/u_blitter.h" #include "util/u_slab.h" +#include "util/u_suballoc.h" #include "r600.h" #include "r600_llvm.h" #include "r600_public.h" @@ -389,6 +390,7 @@ struct r600_context { struct radeon_winsys_cs *cs; struct blitter_context *blitter; struct u_upload_mgr *uploader; + struct u_suballocator *allocator_so_filled_size; struct util_slab_mempool pool_transfers; /* Hardware info. */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b132850f0c7..66120cad27d 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -956,25 +956,25 @@ r600_create_so_target(struct pipe_context *ctx, { struct r600_context *rctx = (struct r600_context *)ctx; struct r600_so_target *t; - void *ptr; t = CALLOC_STRUCT(r600_so_target); if (!t) { return NULL; } + u_suballocator_alloc(rctx->allocator_so_filled_size, 4, + &t->buf_filled_size_offset, + (struct pipe_resource**)&t->buf_filled_size); + if (!t->buf_filled_size) { + FREE(t); + return NULL; + } + t->b.reference.count = 1; t->b.context = ctx; pipe_resource_reference(&t->b.buffer, buffer); t->b.buffer_offset = buffer_offset; t->b.buffer_size = buffer_size; - - t->filled_size = (struct r600_resource*) - pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STATIC, 4); - ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); - memset(ptr, 0, t->filled_size->buf->size); - rctx->ws->buffer_unmap(t->filled_size->cs_buf); - return &t->b; } @@ -983,7 +983,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx, { struct r600_so_target *t = (struct r600_so_target*)target; pipe_resource_reference(&t->b.buffer, NULL); - pipe_resource_reference((struct pipe_resource**)&t->filled_size, NULL); + pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL); FREE(t); } @@ -1308,7 +1308,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } else { if (info.count_from_stream_output) { struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output; - uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->filled_size); + uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->buf_filled_size) + t->buf_filled_size_offset; r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); @@ -1320,7 +1320,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = 0; /* unused */ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->filled_size, RADEON_USAGE_READ); + cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ); } cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing); -- 2.30.2