From eb19163a4dd3d7bfeed63229820c926f99ed00d9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Michel=20D=C3=A4nzer?= Date: Thu, 16 May 2013 11:50:00 +0200 Subject: [PATCH] radeonsi: Initial support for multiple constant buffers Just enough to support an additional internal constant buffer for the user clip planes. NOTE: This is a candidate for the 9.1 branch. --- src/gallium/drivers/radeonsi/r600_buffer.c | 30 -------- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 8 ++ src/gallium/drivers/radeonsi/si_state.c | 81 +++++++++----------- src/gallium/drivers/radeonsi/si_state_draw.c | 73 ++++++++++++++++++ 4 files changed, 119 insertions(+), 73 deletions(-) diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c b/src/gallium/drivers/radeonsi/r600_buffer.c index 0c33c1e162f..cdf9988c6e7 100644 --- a/src/gallium/drivers/radeonsi/r600_buffer.c +++ b/src/gallium/drivers/radeonsi/r600_buffer.c @@ -24,7 +24,6 @@ * Jerome Glisse * Corbin Simpson */ -#include #include "pipe/p_screen.h" #include "util/u_format.h" @@ -169,32 +168,3 @@ void r600_upload_index_buffer(struct r600_context *rctx, u_upload_data(rctx->uploader, 0, count * ib->index_size, ib->user_buffer, &ib->offset, &ib->buffer); } - -void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource **rbuffer, - const uint8_t *ptr, unsigned size, - uint32_t *const_offset) -{ - *rbuffer = NULL; - - if (R600_BIG_ENDIAN) { - uint32_t *tmpPtr; - unsigned i; - - if (!(tmpPtr = malloc(size))) { - R600_ERR("Failed to allocate BE swap buffer.\n"); - return; - } - - for (i = 0; i < size / 4; ++i) { - tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]); - } - - u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset, - (struct pipe_resource**)rbuffer); - - free(tmpPtr); - } else { - u_upload_data(rctx->uploader, 0, size, ptr, const_offset, - (struct pipe_resource**)rbuffer); - } -} diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index c5b33f721a4..e50088f6e3b 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -124,6 +124,13 @@ struct r600_fence_block { #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 +struct r600_constbuf_state +{ + struct pipe_constant_buffer cb[2]; + uint32_t enabled_mask; + uint32_t dirty_mask; +}; + struct r600_context { struct pipe_context context; struct blitter_context *blitter; @@ -152,6 +159,7 @@ struct r600_context { /* shader information */ unsigned sprite_coord_enable; unsigned export_16bpc; + struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; struct r600_textures_info vs_samplers; struct r600_textures_info ps_samplers; struct si_resource *border_color_table; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 6d072ef725a..de86b1e38de 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -24,12 +24,15 @@ * Christian König */ +#include + #include "util/u_memory.h" #include "util/u_framebuffer.h" #include "util/u_blitter.h" #include "util/u_helpers.h" #include "util/u_math.h" #include "util/u_pack_color.h" +#include "util/u_upload_mgr.h" #include "util/u_format_s3tc.h" #include "tgsi/tgsi_parse.h" #include "radeonsi_pipe.h" @@ -2492,64 +2495,56 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state) * Constants */ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_constant_buffer *cb) + struct pipe_constant_buffer *input) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_resource *rbuffer = cb ? si_resource(cb->buffer) : NULL; - struct si_pm4_state *pm4; - uint32_t offset; - uint64_t va; + struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; + struct pipe_constant_buffer *cb; + const uint8_t *ptr; /* Note that the state tracker can unbind constant buffers by * passing NULL here. */ - if (cb == NULL || (!cb->buffer && !cb->user_buffer)) + if (unlikely(!input || (!input->buffer && !input->user_buffer))) { + state->enabled_mask &= ~(1 << index); + state->dirty_mask &= ~(1 << index); + pipe_resource_reference(&state->cb[index].buffer, NULL); return; + } - pm4 = CALLOC_STRUCT(si_pm4_state); - si_pm4_inval_shader_cache(pm4); - - if (cb->user_buffer) - r600_upload_const_buffer(rctx, &rbuffer, cb->user_buffer, cb->buffer_size, &offset); - else - offset = 0; - va = r600_resource_va(ctx->screen, (void*)rbuffer); - va += offset; + cb = &state->cb[index]; + cb->buffer_size = input->buffer_size; - si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ); + ptr = input->user_buffer; - si_pm4_sh_data_begin(pm4); + if (ptr) { + /* Upload the user buffer. */ + if (R600_BIG_ENDIAN) { + uint32_t *tmpPtr; + unsigned i, size = input->buffer_size; - /* Fill in a T# buffer resource description */ - si_pm4_sh_data_add(pm4, va); - si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) | - S_008F04_STRIDE(0))); - si_pm4_sh_data_add(pm4, cb->buffer_size); - si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32)); - - switch (shader) { - case PIPE_SHADER_VERTEX: - si_pm4_sh_data_end(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0, SI_SGPR_CONST); - si_pm4_set_state(rctx, vs_const, pm4); - break; + if (!(tmpPtr = malloc(size))) { + R600_ERR("Failed to allocate BE swap buffer.\n"); + return; + } - case PIPE_SHADER_FRAGMENT: - si_pm4_sh_data_end(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0, SI_SGPR_CONST); - si_pm4_set_state(rctx, ps_const, pm4); - break; + for (i = 0; i < size / 4; ++i) { + tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]); + } - default: - R600_ERR("unsupported %d\n", shader); - FREE(pm4); + u_upload_data(rctx->uploader, 0, size, tmpPtr, &cb->buffer_offset, &cb->buffer); + free(tmpPtr); + } else { + u_upload_data(rctx->uploader, 0, input->buffer_size, ptr, &cb->buffer_offset, &cb->buffer); + } + } else { + /* Setup the hw buffer. */ + cb->buffer_offset = input->buffer_offset; + pipe_resource_reference(&cb->buffer, input->buffer); } - if (cb->buffer != &rbuffer->b.b) - si_resource_reference(&rbuffer, NULL); + state->enabled_mask |= 1 << index; + state->dirty_mask |= 1 << index; } /* diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index a9ecc64afa4..1c63b140204 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -431,6 +431,78 @@ static void si_update_derived_state(struct r600_context *rctx) } } +static void si_constant_buffer_update(struct r600_context *rctx) +{ + struct pipe_context *ctx = &rctx->context; + struct si_pm4_state *pm4; + unsigned shader, i; + uint64_t va; + + if (!rctx->constbuf_state[PIPE_SHADER_VERTEX].dirty_mask && + !rctx->constbuf_state[PIPE_SHADER_FRAGMENT].dirty_mask) + return; + + for (shader = PIPE_SHADER_VERTEX ; shader <= PIPE_SHADER_FRAGMENT; shader++) { + struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; + + pm4 = CALLOC_STRUCT(si_pm4_state); + if (!pm4) + continue; + + si_pm4_inval_shader_cache(pm4); + si_pm4_sh_data_begin(pm4); + + for (i = 0; i < 2; i++) { + if (state->enabled_mask & (1 << i)) { + struct pipe_constant_buffer *cb = &state->cb[i]; + struct si_resource *rbuffer = si_resource(cb->buffer); + + va = r600_resource_va(ctx->screen, (void*)rbuffer); + va += cb->buffer_offset; + + si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ); + + /* Fill in a T# buffer resource description */ + si_pm4_sh_data_add(pm4, va); + si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(0))); + si_pm4_sh_data_add(pm4, cb->buffer_size); + si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32)); + } else { + /* Fill in an empty T# buffer resource description */ + si_pm4_sh_data_add(pm4, 0); + si_pm4_sh_data_add(pm4, 0); + si_pm4_sh_data_add(pm4, 0); + si_pm4_sh_data_add(pm4, 0); + } + } + + switch (shader) { + case PIPE_SHADER_VERTEX: + si_pm4_sh_data_end(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0, SI_SGPR_CONST); + si_pm4_set_state(rctx, vs_const, pm4); + break; + + case PIPE_SHADER_FRAGMENT: + si_pm4_sh_data_end(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0, SI_SGPR_CONST); + si_pm4_set_state(rctx, ps_const, pm4); + break; + + default: + R600_ERR("unsupported %d\n", shader); + FREE(pm4); + return; + } + + state->dirty_mask = 0; + } +} + static void si_vertex_buffer_update(struct r600_context *rctx) { struct pipe_context *ctx = &rctx->context; @@ -555,6 +627,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) return; si_update_derived_state(rctx); + si_constant_buffer_update(rctx); si_vertex_buffer_update(rctx); if (info->indexed) { -- 2.30.2