From aa5c40f97cf5d0609dfb8c0792eca5f6d5108579 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 17 Aug 2013 19:19:30 +0200 Subject: [PATCH] radeonsi: convert constant buffers to si_descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit There is a new "class" si_buffer_resources, which should be good enough for implementing any kind of buffer bindings (constant buffers, vertex buffers, streamout buffers, shader storage buffers, etc.) I don't even keep a copy of pipe_constant_buffer - we don't need it. The main motivation behind this is to have a well-tested infrastrusture for setting up streamout buffers. Reviewed-by: Michel Dänzer Reviewed-by: Christian König Tested-by: Tom Stellard --- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 10 +- src/gallium/drivers/radeonsi/si_descriptors.c | 143 +++++++++++++++++- src/gallium/drivers/radeonsi/si_state.c | 42 ----- src/gallium/drivers/radeonsi/si_state.h | 15 +- src/gallium/drivers/radeonsi/si_state_draw.c | 80 +--------- 5 files changed, 162 insertions(+), 128 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index 517925d8650..9d84e613e51 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -115,13 +115,6 @@ struct r600_fence_block { struct list_head head; }; -struct r600_constbuf_state -{ - struct pipe_constant_buffer cb[2]; - uint32_t enabled_mask; - uint32_t dirty_mask; -}; - #define SI_NUM_ATOMS(rctx) (sizeof((rctx)->atoms)/sizeof((rctx)->atoms.array[0])) #define SI_NUM_SHADERS (PIPE_SHADER_FRAGMENT+1) @@ -138,6 +131,7 @@ struct r600_context { union { struct { + struct r600_atom *const_buffers[SI_NUM_SHADERS]; struct r600_atom *sampler_views[SI_NUM_SHADERS]; }; struct r600_atom *array[0]; @@ -164,7 +158,7 @@ struct r600_context { /* shader information */ unsigned sprite_coord_enable; unsigned export_16bpc; - struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; + struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; struct r600_textures_info samplers[SI_NUM_SHADERS]; struct r600_resource *border_color_table; unsigned border_color_offset; diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index db0da754036..2983d754efd 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -32,7 +32,7 @@ #define SI_NUM_CONTEXTS 256 -static const uint32_t null_desc[8]; /* zeros */ +static uint32_t null_desc[8]; /* zeros */ /* Set this if you want the 3D engine to wait until CP DMA is done. * It should be set on the last CP DMA packet. */ @@ -170,7 +170,7 @@ static void si_emit_shader_pointer(struct r600_context *rctx, static void si_emit_descriptors(struct r600_context *rctx, struct si_descriptors *desc, - const uint32_t **descriptors) + uint32_t **descriptors) { struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; uint64_t va_base; @@ -325,6 +325,135 @@ void si_set_sampler_view(struct r600_context *rctx, unsigned shader, si_update_descriptors(&views->desc); } +/* BUFFER RESOURCES */ + +static void si_emit_buffer_resources(struct r600_context *rctx, struct r600_atom *atom) +{ + struct si_buffer_resources *buffers = (struct si_buffer_resources*)atom; + + si_emit_descriptors(rctx, &buffers->desc, buffers->desc_data); +} + +static void si_init_buffer_resources(struct r600_context *rctx, + struct si_buffer_resources *buffers, + unsigned num_buffers, unsigned shader, + unsigned shader_userdata_index, + enum radeon_bo_usage shader_usage) +{ + int i; + + buffers->num_buffers = num_buffers; + buffers->shader_usage = shader_usage; + buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); + buffers->desc_storage = CALLOC(num_buffers, sizeof(uint32_t) * 4); + + /* si_emit_descriptors only accepts an array of arrays. + * This adds such an array. */ + buffers->desc_data = CALLOC(num_buffers, sizeof(uint32_t*)); + for (i = 0; i < num_buffers; i++) { + buffers->desc_data[i] = &buffers->desc_storage[i*4]; + } + + si_init_descriptors(rctx, &buffers->desc, + si_get_shader_user_data_base(shader) + + shader_userdata_index*4, 4, num_buffers, + si_emit_buffer_resources); +} + +static void si_release_buffer_resources(struct si_buffer_resources *buffers) +{ + int i; + + for (i = 0; i < Elements(buffers->buffers); i++) { + pipe_resource_reference(&buffers->buffers[i], NULL); + } + + FREE(buffers->buffers); + FREE(buffers->desc_storage); + FREE(buffers->desc_data); + si_release_descriptors(&buffers->desc); +} + +static void si_buffer_resources_begin_new_cs(struct r600_context *rctx, + struct si_buffer_resources *buffers) +{ + unsigned mask = buffers->desc.enabled_mask; + + /* Add relocations to the CS. */ + while (mask) { + int i = u_bit_scan(&mask); + + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + (struct r600_resource*)buffers->buffers[i], + buffers->shader_usage); + } + + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + buffers->desc.buffer, RADEON_USAGE_READWRITE); + + si_emit_shader_pointer(rctx, &buffers->desc); +} + +/* CONSTANT BUFFERS */ + +static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot, + struct pipe_constant_buffer *input) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + struct si_buffer_resources *buffers = &rctx->const_buffers[shader]; + + if (shader >= SI_NUM_SHADERS) + return; + + rctx->b.flags |= R600_CONTEXT_INV_CONST_CACHE; + + assert(slot < buffers->num_buffers); + pipe_resource_reference(&buffers->buffers[slot], NULL); + + if (input && (input->buffer || input->user_buffer)) { + struct pipe_resource *buffer = NULL; + uint64_t va; + + /* Upload the user buffer if needed. */ + if (input->user_buffer) { + unsigned buffer_offset; + + r600_upload_const_buffer(rctx, + (struct r600_resource**)&buffer, input->user_buffer, + input->buffer_size, &buffer_offset); + va = r600_resource_va(ctx->screen, buffer) + buffer_offset; + } else { + pipe_resource_reference(&buffer, input->buffer); + va = r600_resource_va(ctx->screen, buffer) + input->buffer_offset; + } + + /* Set the descriptor. */ + uint32_t *desc = buffers->desc_data[slot]; + desc[0] = va; + desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | + S_008F04_STRIDE(0); + desc[2] = input->buffer_size; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); + + buffers->buffers[slot] = buffer; + r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, + (struct r600_resource*)buffer, buffers->shader_usage); + buffers->desc.enabled_mask |= 1 << slot; + } else { + /* Clear the descriptor. */ + memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4); + buffers->desc.enabled_mask &= ~(1 << slot); + } + + buffers->desc.dirty_mask |= 1 << slot; + si_update_descriptors(&buffers->desc); +} + /* INIT/DEINIT */ void si_init_all_descriptors(struct r600_context *rctx) @@ -332,10 +461,18 @@ void si_init_all_descriptors(struct r600_context *rctx) int i; for (i = 0; i < SI_NUM_SHADERS; i++) { + si_init_buffer_resources(rctx, &rctx->const_buffers[i], + NUM_CONST_BUFFERS, i, SI_SGPR_CONST, + RADEON_USAGE_READ); + si_init_sampler_views(rctx, &rctx->samplers[i].views, i); + rctx->atoms.const_buffers[i] = &rctx->const_buffers[i].desc.atom; rctx->atoms.sampler_views[i] = &rctx->samplers[i].views.desc.atom; } + + /* Set pipe_context functions. */ + rctx->b.b.set_constant_buffer = si_set_constant_buffer; } void si_release_all_descriptors(struct r600_context *rctx) @@ -343,6 +480,7 @@ void si_release_all_descriptors(struct r600_context *rctx) int i; for (i = 0; i < SI_NUM_SHADERS; i++) { + si_release_buffer_resources(&rctx->const_buffers[i]); si_release_sampler_views(&rctx->samplers[i].views); } } @@ -352,6 +490,7 @@ void si_all_descriptors_begin_new_cs(struct r600_context *rctx) int i; for (i = 0; i < SI_NUM_SHADERS; i++) { + si_buffer_resources_begin_new_cs(rctx, &rctx->const_buffers[i]); si_sampler_views_begin_new_cs(rctx, &rctx->samplers[i].views); } } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b4370dad1f8..5ac55f22a5b 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3016,46 +3016,6 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state) free(state); } -/* - * Constants - */ -static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_constant_buffer *input) -{ - struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; - struct pipe_constant_buffer *cb; - const uint8_t *ptr; - - /* Note that the state tracker can unbind constant buffers by - * passing NULL here. - */ - if (unlikely(!input || (!input->buffer && !input->user_buffer))) { - state->enabled_mask &= ~(1 << index); - state->dirty_mask &= ~(1 << index); - pipe_resource_reference(&state->cb[index].buffer, NULL); - return; - } - - cb = &state->cb[index]; - cb->buffer_size = input->buffer_size; - - ptr = input->user_buffer; - - if (ptr) { - r600_upload_const_buffer(rctx, - (struct r600_resource**)&cb->buffer, ptr, - cb->buffer_size, &cb->buffer_offset); - } else { - /* Setup the hw buffer. */ - cb->buffer_offset = input->buffer_offset; - pipe_resource_reference(&cb->buffer, input->buffer); - } - - state->enabled_mask |= 1 << index; - state->dirty_mask |= 1 << index; -} - /* * Vertex elements & buffers */ @@ -3241,8 +3201,6 @@ void si_init_state_functions(struct r600_context *rctx) rctx->b.b.set_sample_mask = si_set_sample_mask; - rctx->b.b.set_constant_buffer = si_set_constant_buffer; - rctx->b.b.create_vertex_elements_state = si_create_vertex_elements; rctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements; rctx->b.b.delete_vertex_elements_state = si_delete_vertex_element; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 20ae4332c4c..82fac4a4c52 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -92,11 +92,9 @@ union si_state { struct si_pm4_state *vs; struct si_pm4_state *vs_sampler_views; struct si_pm4_state *vs_sampler; - struct si_pm4_state *vs_const; struct si_pm4_state *ps; struct si_pm4_state *ps_sampler_views; struct si_pm4_state *ps_sampler; - struct si_pm4_state *ps_const; struct si_pm4_state *spi; struct si_pm4_state *vertex_buffers; struct si_pm4_state *texture_barrier; @@ -114,6 +112,8 @@ union si_state { #define FMASK_TEX_OFFSET NUM_TEX_UNITS #define NUM_SAMPLER_VIEWS (FMASK_TEX_OFFSET+NUM_TEX_UNITS) +#define NUM_CONST_BUFFERS 2 + /* This represents resource descriptors in memory, such as buffer resources, * image resources, and sampler states. */ @@ -149,7 +149,16 @@ struct si_descriptors { struct si_sampler_views { struct si_descriptors desc; struct pipe_sampler_view *views[NUM_SAMPLER_VIEWS]; - const uint32_t *desc_data[NUM_SAMPLER_VIEWS]; + uint32_t *desc_data[NUM_SAMPLER_VIEWS]; +}; + +struct si_buffer_resources { + struct si_descriptors desc; + unsigned num_buffers; + enum radeon_bo_usage shader_usage; /* READ, WRITE, or READWRITE */ + struct pipe_resource **buffers; /* this has num_buffers elements */ + uint32_t *desc_storage; /* this has num_buffers*4 elements */ + uint32_t **desc_data; /* an array of pointers pointing to desc_storage */ }; #define si_pm4_block_idx(member) \ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 1f740ba6f93..5c693ade88d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -453,78 +453,6 @@ static void si_update_derived_state(struct r600_context *rctx) } } -static void si_constant_buffer_update(struct r600_context *rctx) -{ - struct pipe_context *ctx = &rctx->b.b; - struct si_pm4_state *pm4; - unsigned shader, i; - uint64_t va; - - if (!rctx->constbuf_state[PIPE_SHADER_VERTEX].dirty_mask && - !rctx->constbuf_state[PIPE_SHADER_FRAGMENT].dirty_mask) - return; - - for (shader = PIPE_SHADER_VERTEX ; shader <= PIPE_SHADER_FRAGMENT; shader++) { - struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; - - pm4 = CALLOC_STRUCT(si_pm4_state); - if (!pm4) - continue; - - si_pm4_inval_shader_cache(pm4); - si_pm4_sh_data_begin(pm4); - - for (i = 0; i < 2; i++) { - if (state->enabled_mask & (1 << i)) { - struct pipe_constant_buffer *cb = &state->cb[i]; - struct r600_resource *rbuffer = r600_resource(cb->buffer); - - va = r600_resource_va(ctx->screen, (void*)rbuffer); - va += cb->buffer_offset; - - si_pm4_add_bo(pm4, rbuffer, RADEON_USAGE_READ); - - /* Fill in a T# buffer resource description */ - si_pm4_sh_data_add(pm4, va); - si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) | - S_008F04_STRIDE(0))); - si_pm4_sh_data_add(pm4, cb->buffer_size); - si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | - S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | - S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32)); - } else { - /* Fill in an empty T# buffer resource description */ - si_pm4_sh_data_add(pm4, 0); - si_pm4_sh_data_add(pm4, 0); - si_pm4_sh_data_add(pm4, 0); - si_pm4_sh_data_add(pm4, 0); - } - } - - switch (shader) { - case PIPE_SHADER_VERTEX: - si_pm4_sh_data_end(pm4, R_00B130_SPI_SHADER_USER_DATA_VS_0, SI_SGPR_CONST); - si_pm4_set_state(rctx, vs_const, pm4); - break; - - case PIPE_SHADER_FRAGMENT: - si_pm4_sh_data_end(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0, SI_SGPR_CONST); - si_pm4_set_state(rctx, ps_const, pm4); - break; - - default: - R600_ERR("unsupported %d\n", shader); - FREE(pm4); - return; - } - - state->dirty_mask = 0; - } -} - static void si_vertex_buffer_update(struct r600_context *rctx) { struct pipe_context *ctx = &rctx->b.b; @@ -653,7 +581,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) return; si_update_derived_state(rctx); - si_constant_buffer_update(rctx); si_vertex_buffer_update(rctx); if (info->indexed) { @@ -678,7 +605,14 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_state_draw(rctx, info, &ib); + /* Cache flushing via CP_COHER_CNTL. */ cp_coher_cntl = si_pm4_sync_flags(rctx); + + if (rctx->b.flags & R600_CONTEXT_INV_CONST_CACHE) { + cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1) | + S_0085F0_SH_KCACHE_ACTION_ENA(1); + } + if (cp_coher_cntl) { struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx); -- 2.30.2