From f9caabe8f1bff86d19b53d9ecba5c72b238d9e23 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 11 Sep 2015 04:43:53 +0100 Subject: [PATCH] r600g: lower number of driver const buffers I'm going to want a driver constant buffer for tess to coordinate LDS storage, so before I go tackling that I decided to merge the clip/samplepos and texture info buffers into one. So I can steal the spare one. This creates a single constant buffer between the two, with clip/samplepos taking up a reserved 128 bytes at the start. Reviewed-by: Edward O'Callaghan Reviewed-by: Glenn Kennard Signed-off-by: Dave Airlie --- src/gallium/drivers/r600/r600_pipe.h | 28 ++-- src/gallium/drivers/r600/r600_shader.c | 21 +-- src/gallium/drivers/r600/r600_state_common.c | 152 ++++++++++++------- 3 files changed, 131 insertions(+), 70 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 25df831339c..d0774de8573 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -63,13 +63,15 @@ #define R600_TRACE_CS_DWORDS 7 #define R600_MAX_USER_CONST_BUFFERS 13 -#define R600_MAX_DRIVER_CONST_BUFFERS 3 +#define R600_MAX_DRIVER_CONST_BUFFERS 2 #define R600_MAX_CONST_BUFFERS (R600_MAX_USER_CONST_BUFFERS + R600_MAX_DRIVER_CONST_BUFFERS) /* start driver buffers after user buffers */ -#define R600_UCP_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) -#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) -#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 2) +#define R600_BUFFER_INFO_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) +#define R600_UCP_SIZE (4*4*8) +#define R600_BUFFER_INFO_OFFSET (R600_UCP_SIZE) + +#define R600_GS_RING_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS + 1) /* Currently R600_MAX_CONST_BUFFERS just fits on the hw, which has a limit * of 16 const buffers. * UCP/SAMPLE_POSITIONS are never accessed by same shader stage so they can use the same id. @@ -77,8 +79,6 @@ * In order to support d3d 11 mandated minimum of 15 user const buffers * we'd have to squash all use cases into one driver buffer. */ -#define R600_SAMPLE_POSITIONS_CONST_BUFFER (R600_MAX_USER_CONST_BUFFERS) - #define R600_MAX_CONST_BUFFER_SIZE (4096 * sizeof(float[4])) #ifdef PIPE_ARCH_BIG_ENDIAN @@ -356,11 +356,15 @@ struct r600_textures_info { struct r600_samplerview_state views; struct r600_sampler_states states; bool is_array_sampler[NUM_TEX_UNITS]; +}; - /* cube array txq workaround */ - uint32_t *txq_constants; - /* buffer related workarounds */ - uint32_t *buffer_constants; +struct r600_shader_driver_constants_info { + /* currently 128 bytes for UCP/samplepos + sampler buffer constants */ + uint32_t *constants; + uint32_t alloc_size; + bool vs_ucp_dirty; + bool texture_const_dirty; + bool ps_sample_pos_dirty; }; struct r600_constbuf_state @@ -472,6 +476,9 @@ struct r600_context { struct r600_gs_rings_state gs_rings; struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; struct r600_textures_info samplers[PIPE_SHADER_TYPES]; + + struct r600_shader_driver_constants_info driver_consts[PIPE_SHADER_TYPES]; + /** Vertex buffers for fetch shaders */ struct r600_vertexbuf_state vertex_buffer_state; /** Vertex buffers for compute shaders */ @@ -498,6 +505,7 @@ struct r600_context { void *sb_context; struct r600_isa *isa; + float sample_positions[4 * 16]; }; static inline void r600_emit_command_buffer(struct radeon_winsys_cs *cs, diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index f2c9e169f74..93b1bf7d5b4 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -60,6 +60,7 @@ issued in the w slot as well. The compiler must issue the source argument to slots z, y, and x */ +#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) static int r600_shader_from_tgsi(struct r600_context *rctx, struct r600_pipe_shader *pipeshader, union r600_shader_key key); @@ -947,7 +948,7 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_ memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); vtx.op = FETCH_OP_VFETCH; - vtx.buffer_id = R600_SAMPLE_POSITIONS_CONST_BUFFER; + vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; if (sample_id == NULL) { vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; @@ -2307,7 +2308,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx, alu.src[0].chan = j; alu.src[1].sel = 512 + i; - alu.src[1].kc_bank = R600_UCP_CONST_BUFFER; + alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; alu.src[1].chan = j; alu.dst.sel = clipdist_temp[oreg]; @@ -5499,7 +5500,8 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l alu.src[0].sel = vtx.dst_gpr; alu.src[0].chan = i; - alu.src[1].sel = 512 + (id * 2); + alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; + alu.src[1].sel += (id * 2); alu.src[1].chan = i % 4; alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; @@ -5521,7 +5523,7 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l alu.src[0].sel = vtx.dst_gpr; alu.src[0].chan = 3; - alu.src[1].sel = 512 + (id * 2) + 1; + alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; alu.src[1].chan = 0; alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; @@ -5542,14 +5544,14 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; - + alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; if (ctx->bc->chip_class >= EVERGREEN) { /* channel 0 or 2 of each word */ - alu.src[0].sel = 512 + (id / 2); + alu.src[0].sel += (id / 2); alu.src[0].chan = (id % 2) * 2; } else { /* r600 we have them at channel 2 of the second dword */ - alu.src[0].sel = 512 + (id * 2) + 1; + alu.src[0].sel += (id * 2) + 1; alu.src[0].chan = 1; } alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; @@ -6207,13 +6209,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.op = ALU_OP1_MOV; + alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; if (ctx->bc->chip_class >= EVERGREEN) { /* channel 1 or 3 of each word */ - alu.src[0].sel = 512 + (id / 2); + alu.src[0].sel += (id / 2); alu.src[0].chan = ((id % 2) * 2) + 1; } else { /* r600 we have them at channel 2 of the second dword */ - alu.src[0].sel = 512 + (id * 2) + 1; + alu.src[0].sel += (id * 2) + 1; alu.src[0].chan = 2; } alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index ae1341187cb..21c89dc0b61 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -240,17 +240,10 @@ static void r600_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) { struct r600_context *rctx = (struct r600_context *)ctx; - struct pipe_constant_buffer cb; rctx->clip_state.state = *state; r600_mark_atom_dirty(rctx, &rctx->clip_state.atom); - - cb.buffer = NULL; - cb.user_buffer = state->ucp; - cb.buffer_offset = 0; - cb.buffer_size = 4*4*8; - ctx->set_constant_buffer(ctx, PIPE_SHADER_VERTEX, R600_UCP_CONST_BUFFER, &cb); - pipe_resource_reference(&cb.buffer, NULL); + rctx->driver_consts[PIPE_SHADER_VERTEX].vs_ucp_dirty = true; } static void r600_set_stencil_ref(struct pipe_context *ctx, @@ -1053,6 +1046,74 @@ static void r600_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask r600_mark_atom_dirty(rctx, &rctx->sample_mask.atom); } +static void r600_update_driver_const_buffers(struct r600_context *rctx) +{ + int sh, size;; + void *ptr; + struct pipe_constant_buffer cb; + for (sh = 0; sh < PIPE_SHADER_TYPES; sh++) { + struct r600_shader_driver_constants_info *info = &rctx->driver_consts[sh]; + if (!info->vs_ucp_dirty && + !info->texture_const_dirty && + !info->ps_sample_pos_dirty) + continue; + + ptr = info->constants; + size = info->alloc_size; + if (info->vs_ucp_dirty) { + assert(sh == PIPE_SHADER_VERTEX); + if (!size) { + ptr = rctx->clip_state.state.ucp; + size = R600_UCP_SIZE; + } else { + memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE); + } + info->vs_ucp_dirty = false; + } + + if (info->ps_sample_pos_dirty) { + assert(sh == PIPE_SHADER_FRAGMENT); + if (!size) { + ptr = rctx->sample_positions; + size = R600_UCP_SIZE; + } else { + memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE); + } + info->ps_sample_pos_dirty = false; + } + + if (info->texture_const_dirty) { + assert (ptr); + assert (size); + if (sh == PIPE_SHADER_VERTEX) + memcpy(ptr, rctx->clip_state.state.ucp, R600_UCP_SIZE); + if (sh == PIPE_SHADER_FRAGMENT) + memcpy(ptr, rctx->sample_positions, R600_UCP_SIZE); + } + info->texture_const_dirty = false; + + cb.buffer = NULL; + cb.user_buffer = ptr; + cb.buffer_offset = 0; + cb.buffer_size = size; + rctx->b.b.set_constant_buffer(&rctx->b.b, sh, R600_BUFFER_INFO_CONST_BUFFER, &cb); + pipe_resource_reference(&cb.buffer, NULL); + } +} + +static void *r600_alloc_buf_consts(struct r600_context *rctx, int shader_type, + int array_size, uint32_t *base_offset) +{ + struct r600_shader_driver_constants_info *info = &rctx->driver_consts[shader_type]; + if (array_size + R600_UCP_SIZE > info->alloc_size) { + info->constants = realloc(info->constants, array_size + R600_UCP_SIZE); + info->alloc_size = array_size + R600_UCP_SIZE; + } + memset(info->constants + (R600_UCP_SIZE / 4), 0, array_size); + info->texture_const_dirty = true; + *base_offset = R600_UCP_SIZE; + return info->constants; +} /* * On r600/700 hw we don't have vertex fetch swizzle, though TBO * doesn't require full swizzles it does need masking and setting alpha @@ -1067,9 +1128,9 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty struct r600_textures_info *samplers = &rctx->samplers[shader_type]; int bits; uint32_t array_size; - struct pipe_constant_buffer cb; int i, j; - + uint32_t *constants; + uint32_t base_offset; if (!samplers->views.dirty_buffer_constants) return; @@ -1077,38 +1138,33 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty bits = util_last_bit(samplers->views.enabled_mask); array_size = bits * 8 * sizeof(uint32_t) * 4; - samplers->buffer_constants = realloc(samplers->buffer_constants, array_size); - memset(samplers->buffer_constants, 0, array_size); + + constants = r600_alloc_buf_consts(rctx, shader_type, array_size, &base_offset); + for (i = 0; i < bits; i++) { if (samplers->views.enabled_mask & (1 << i)) { - int offset = i * 8; + int offset = (base_offset / 4) + i * 8; const struct util_format_description *desc; desc = util_format_description(samplers->views.views[i]->base.format); for (j = 0; j < 4; j++) if (j < desc->nr_channels) - samplers->buffer_constants[offset+j] = 0xffffffff; + constants[offset+j] = 0xffffffff; else - samplers->buffer_constants[offset+j] = 0x0; + constants[offset+j] = 0x0; if (desc->nr_channels < 4) { if (desc->channel[0].pure_integer) - samplers->buffer_constants[offset+4] = 1; + constants[offset+4] = 1; else - samplers->buffer_constants[offset+4] = fui(1.0); + constants[offset+4] = fui(1.0); } else - samplers->buffer_constants[offset + 4] = 0; + constants[offset + 4] = 0; - samplers->buffer_constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); - samplers->buffer_constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6; + constants[offset + 5] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + constants[offset + 6] = samplers->views.views[i]->base.texture->array_size / 6; } } - cb.buffer = NULL; - cb.user_buffer = samplers->buffer_constants; - cb.buffer_offset = 0; - cb.buffer_size = array_size; - rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb); - pipe_resource_reference(&cb.buffer, NULL); } /* On evergreen we store two values @@ -1120,9 +1176,9 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type struct r600_textures_info *samplers = &rctx->samplers[shader_type]; int bits; uint32_t array_size; - struct pipe_constant_buffer cb; int i; - + uint32_t *constants; + uint32_t base_offset; if (!samplers->views.dirty_buffer_constants) return; @@ -1130,45 +1186,37 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type bits = util_last_bit(samplers->views.enabled_mask); array_size = bits * 2 * sizeof(uint32_t) * 4; - samplers->buffer_constants = realloc(samplers->buffer_constants, array_size); - memset(samplers->buffer_constants, 0, array_size); + + constants = r600_alloc_buf_consts(rctx, shader_type, array_size, + &base_offset); + for (i = 0; i < bits; i++) { if (samplers->views.enabled_mask & (1 << i)) { - uint32_t offset = i * 2; - samplers->buffer_constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); - samplers->buffer_constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6; + uint32_t offset = (base_offset / 4) + i * 2; + constants[offset] = samplers->views.views[i]->base.texture->width0 / util_format_get_blocksize(samplers->views.views[i]->base.format); + constants[offset + 1] = samplers->views.views[i]->base.texture->array_size / 6; } } - - cb.buffer = NULL; - cb.user_buffer = samplers->buffer_constants; - cb.buffer_offset = 0; - cb.buffer_size = array_size; - rctx->b.b.set_constant_buffer(&rctx->b.b, shader_type, R600_BUFFER_INFO_CONST_BUFFER, &cb); - pipe_resource_reference(&cb.buffer, NULL); } /* set sample xy locations as array of fragment shader constants */ void r600_set_sample_locations_constant_buffer(struct r600_context *rctx) { - struct pipe_constant_buffer constbuf = {0}; - float values[4*16] = {0.0f}; int i; struct pipe_context *ctx = &rctx->b.b; - assert(rctx->framebuffer.nr_samples <= Elements(values)/4); + assert(rctx->framebuffer.nr_samples < R600_UCP_SIZE); + assert(rctx->framebuffer.nr_samples <= Elements(rctx->sample_positions)/4); + + memset(rctx->sample_positions, 0, 4 * 4 * 16); for (i = 0; i < rctx->framebuffer.nr_samples; i++) { - ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &values[4*i]); + ctx->get_sample_position(ctx, rctx->framebuffer.nr_samples, i, &rctx->sample_positions[4*i]); /* Also fill in center-zeroed positions used for interpolateAtSample */ - values[4*i + 2] = values[4*i + 0] - 0.5f; - values[4*i + 3] = values[4*i + 1] - 0.5f; + rctx->sample_positions[4*i + 2] = rctx->sample_positions[4*i + 0] - 0.5f; + rctx->sample_positions[4*i + 3] = rctx->sample_positions[4*i + 1] - 0.5f; } - constbuf.user_buffer = values; - constbuf.buffer_size = rctx->framebuffer.nr_samples * 4 * 4; - ctx->set_constant_buffer(ctx, PIPE_SHADER_FRAGMENT, - R600_SAMPLE_POSITIONS_CONST_BUFFER, &constbuf); - pipe_resource_reference(&constbuf.buffer, NULL); + rctx->driver_consts[PIPE_SHADER_FRAGMENT].ps_sample_pos_dirty = true; } static void update_shader_atom(struct pipe_context *ctx, @@ -1387,6 +1435,8 @@ static bool r600_update_derived_state(struct r600_context *rctx) } } + r600_update_driver_const_buffers(rctx); + if (rctx->b.chip_class < EVERGREEN && rctx->ps_shader && rctx->vs_shader) { if (!r600_adjust_gprs(rctx)) { /* discard rendering */ -- 2.30.2