From 523b6c87048ddc5b49be4ca985bf91d8585aef47 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 3 Jan 2018 02:09:01 +0100 Subject: [PATCH] r600: increase number of UBOs to 15 With the exception of the default tess levels only ever accessed by the default tcs shader, the LDS_INFO const buffer was only accessed by vtx instructions, and not through kcache. No idea why really, but use this to our advantage by not using a constant buffer slot for it. This just requires us to throw the default tess levels into the "normal" driver const buffer instead. Alternatively, could acesss those constants via vtx instructions too, but then we couldn't use a ordinary ureg prog accessing them as constants and would have to generate that directly when compiling the default tcs shader. (Another alternative would be to put all lds info into the ordinary driver const buffer, albeit we'd maybe need to increase the fixed size as it can't fit alongside the ucp since vs needs access to the lds info too.) Tested-by: Konstantin Kharlamov Dave Airlie --- src/gallium/drivers/r600/evergreen_state.c | 15 +++------- src/gallium/drivers/r600/r600_pipe.h | 13 +++++--- src/gallium/drivers/r600/r600_state_common.c | 31 +++++++++++++++----- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f645791a2cb..4cc48dfa119 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -2168,8 +2168,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, va = rbuffer->gpu_address + cb->buffer_offset; - if (!gs_ring_buffer) { - assert(buffer_index < R600_MAX_HW_CONST_BUFFERS); + if (buffer_index < R600_MAX_HW_CONST_BUFFERS) { radeon_set_context_reg_flag(cs, reg_alu_constbuf_size + buffer_index * 4, DIV_ROUND_UP(cb->buffer_size, 256), pkt_flags); radeon_set_context_reg_flag(cs, reg_alu_const_cache + buffer_index * 4, va >> 8, @@ -3880,7 +3879,7 @@ static void evergreen_set_tess_state(struct pipe_context *ctx, memcpy(rctx->tess_state, default_outer_level, sizeof(float) * 4); memcpy(rctx->tess_state+4, default_inner_level, sizeof(float) * 2); - rctx->tess_state_dirty = true; + rctx->driver_consts[PIPE_SHADER_TESS_CTRL].tcs_default_levels_dirty = true; } static void evergreen_setup_immed_buffer(struct r600_context *rctx, @@ -4344,7 +4343,7 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe unsigned input_vertex_size, output_vertex_size; unsigned input_patch_size, pervertex_output_patch_size, output_patch_size; unsigned output_patch0_offset, perpatch_output_offset, lds_size; - uint32_t values[16]; + uint32_t values[8]; unsigned num_waves; unsigned num_pipes = rctx->screen->b.info.r600_max_quad_pipes; unsigned wave_divisor = (16 * num_pipes); @@ -4364,7 +4363,6 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe if (rctx->lds_alloc != 0 && rctx->last_ls == ls && - !rctx->tess_state_dirty && rctx->last_num_tcs_input_cp == num_tcs_input_cp && rctx->last_tcs == tcs) return; @@ -4411,17 +4409,12 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe rctx->lds_alloc = (lds_size | (num_waves << 14)); - memcpy(&values[8], rctx->tess_state, 6 * sizeof(float)); - values[14] = 0; - values[15] = 0; - - rctx->tess_state_dirty = false; rctx->last_ls = ls; rctx->last_tcs = tcs; rctx->last_num_tcs_input_cp = num_tcs_input_cp; constbuf.user_buffer = values; - constbuf.buffer_size = 16 * 4; + constbuf.buffer_size = 8 * 4; rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX, R600_LDS_INFO_CONST_BUFFER, &constbuf); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index cb84bc1998a..112b5cbb83e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -71,7 +71,7 @@ #define EG_MAX_ATOMIC_BUFFERS 8 -#define R600_MAX_USER_CONST_BUFFERS 14 +#define R600_MAX_USER_CONST_BUFFERS 15 #define R600_MAX_DRIVER_CONST_BUFFERS 3 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) #define R600_MAX_HW_CONST_BUFFERS 16 @@ -80,12 +80,17 @@ #define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) #define R600_UCP_SIZE (4*4*8) #define R600_CS_BLOCK_GRID_SIZE (8 * 4) +#define R600_TCS_DEFAULT_LEVELS_SIZE (6 * 4) #define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE) +/* + * We only access this buffer through vtx clauses hence it's fine to exist + * at index beyond 15. + */ #define R600_LDS_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) /* * Note GS doesn't use a constant buffer binding, just a resource index, - * so it's fine to have it exist at index 16. I.e. it's not actually + * so it's fine to have it exist at index beyond 15. I.e. it's not actually * a const buffer, just a buffer resource. */ #define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) @@ -396,10 +401,11 @@ struct r600_shader_driver_constants_info { /* currently 128 bytes for UCP/samplepos + sampler buffer constants */ uint32_t *constants; uint32_t alloc_size; - bool vs_ucp_dirty; bool texture_const_dirty; + bool vs_ucp_dirty; bool ps_sample_pos_dirty; bool cs_block_grid_size_dirty; + bool tcs_default_levels_dirty; }; struct r600_constbuf_state @@ -580,7 +586,6 @@ struct r600_context { float sample_positions[4 * 16]; float tess_state[8]; uint32_t cs_block_grid_sizes[8]; /* 3 for grid + 1 pad, 3 for block + 1 pad*/ - bool tess_state_dirty; struct r600_pipe_shader_selector *last_ls; struct r600_pipe_shader_selector *last_tcs; unsigned last_num_tcs_input_cp; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index b3b2fd09ef8..7f4d9f3e334 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -1230,6 +1230,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on if (!info->vs_ucp_dirty && !info->texture_const_dirty && !info->ps_sample_pos_dirty && + !info->tcs_default_levels_dirty && !info->cs_block_grid_size_dirty) continue; @@ -1246,7 +1247,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on info->vs_ucp_dirty = false; } - if (info->ps_sample_pos_dirty) { + else if (info->ps_sample_pos_dirty) { assert(sh == PIPE_SHADER_FRAGMENT); if (!size) { ptr = rctx->sample_positions; @@ -1257,7 +1258,7 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on info->ps_sample_pos_dirty = false; } - if (info->cs_block_grid_size_dirty) { + else if (info->cs_block_grid_size_dirty) { assert(sh == PIPE_SHADER_COMPUTE); if (!size) { ptr = rctx->cs_block_grid_sizes; @@ -1268,6 +1269,20 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on info->cs_block_grid_size_dirty = false; } + else if (info->tcs_default_levels_dirty) { + /* + * We'd only really need this for default tcs shader. + */ + assert(sh == PIPE_SHADER_TESS_CTRL); + if (!size) { + ptr = rctx->tess_state; + size = R600_TCS_DEFAULT_LEVELS_SIZE; + } else { + memcpy(ptr, rctx->tess_state, R600_TCS_DEFAULT_LEVELS_SIZE); + } + info->tcs_default_levels_dirty = false; + } + if (info->texture_const_dirty) { assert (ptr); assert (size); @@ -1277,6 +1292,8 @@ void r600_update_driver_const_buffers(struct r600_context *rctx, bool compute_on memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE); if (sh == PIPE_SHADER_COMPUTE) memcpy(ptr, rctx->cs_block_grid_sizes, R600_CS_BLOCK_GRID_SIZE); + if (sh == PIPE_SHADER_TESS_CTRL) + memcpy(ptr, rctx->tess_state, R600_TCS_DEFAULT_LEVELS_SIZE); } info->texture_const_dirty = false; @@ -1521,11 +1538,11 @@ static void r600_generate_fixed_func_tcs(struct r600_context *rctx) assert(!rctx->fixed_func_tcs_shader); - ureg_DECL_constant2D(ureg, 0, 3, R600_LDS_INFO_CONST_BUFFER); - const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 2), - R600_LDS_INFO_CONST_BUFFER); - const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 3), - R600_LDS_INFO_CONST_BUFFER); + ureg_DECL_constant2D(ureg, 0, 1, R600_BUFFER_INFO_CONST_BUFFER); + const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0), + R600_BUFFER_INFO_CONST_BUFFER); + const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1), + R600_BUFFER_INFO_CONST_BUFFER); tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0); tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0); -- 2.30.2