From 5df24c3fa627243c259f5266359098463e41d172 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 13 May 2017 14:01:27 +0200 Subject: [PATCH] radeonsi: merge constant and shader buffers descriptor lists into one MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Constant buffers: slot[16], .. slot[31] (ascending) Shader buffers: slot[15], .. slot[0] (descending) The idea is that if we have 4 constant buffers and 2 shader buffers, we only have to upload 6 slots. That optimization is left for a later commit. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_debug.c | 44 ++++-- src/gallium/drivers/radeonsi/si_descriptors.c | 144 +++++++++--------- src/gallium/drivers/radeonsi/si_pipe.h | 3 +- src/gallium/drivers/radeonsi/si_shader.c | 32 ++-- src/gallium/drivers/radeonsi/si_shader.h | 20 +-- .../drivers/radeonsi/si_shader_internal.h | 3 +- .../drivers/radeonsi/si_shader_tgsi_mem.c | 13 +- src/gallium/drivers/radeonsi/si_state.h | 25 ++- 8 files changed, 152 insertions(+), 132 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index d1159ad5e27..25c3882a115 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -380,23 +380,24 @@ static void si_dump_framebuffer(struct si_context *sctx, FILE *f) } } +typedef unsigned (*slot_remap_func)(unsigned); + static void si_dump_descriptor_list(struct si_descriptors *desc, const char *shader_name, const char *elem_name, unsigned num_elements, + slot_remap_func slot_remap, FILE *f) { unsigned i, j; - uint32_t *cpu_list = desc->list; - uint32_t *gpu_list = desc->gpu_list; - const char *list_note = "GPU list"; - - if (!gpu_list) { - gpu_list = cpu_list; - list_note = "CPU list"; - } for (i = 0; i < num_elements; i++) { + unsigned dw_offset = slot_remap(i) * desc->element_dw_size; + uint32_t *gpu_ptr = desc->gpu_list ? desc->gpu_list : desc->list; + const char *list_note = desc->gpu_list ? "GPU list" : "CPU list"; + uint32_t *cpu_list = desc->list + dw_offset; + uint32_t *gpu_list = gpu_ptr + dw_offset; + fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", shader_name, elem_name, i, list_note); @@ -444,11 +445,14 @@ static void si_dump_descriptor_list(struct si_descriptors *desc, } fprintf(f, "\n"); - gpu_list += desc->element_dw_size; - cpu_list += desc->element_dw_size; } } +static unsigned si_identity(unsigned slot) +{ + return slot; +} + static void si_dump_descriptors(struct si_context *sctx, enum pipe_shader_type processor, const struct tgsi_shader_info *info, FILE *f) @@ -464,9 +468,16 @@ static void si_dump_descriptors(struct si_context *sctx, " - Sampler", " - Image", }; + static const slot_remap_func remap_func[] = { + si_get_constbuf_slot, + si_get_shaderbuf_slot, + si_identity, + si_identity, + }; unsigned enabled_slots[] = { - sctx->const_buffers[processor].enabled_mask, - sctx->shader_buffers[processor].enabled_mask, + sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS, + util_bitreverse(sctx->const_and_shader_buffers[processor].enabled_mask & + u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS)), sctx->samplers[processor].views.enabled_mask, sctx->images[processor].enabled_mask, }; @@ -481,12 +492,14 @@ static void si_dump_descriptors(struct si_context *sctx, assert(info); /* only CS may not have an info struct */ si_dump_descriptor_list(&sctx->vertex_buffers, shader_name[processor], - " - Vertex buffer", info->num_inputs, f); + " - Vertex buffer", info->num_inputs, + si_identity, f); } for (unsigned i = 0; i < SI_NUM_SHADER_DESCS; ++i, ++descs) si_dump_descriptor_list(descs, shader_name[processor], elem_name[i], - util_last_bit(enabled_slots[i] | required_slots[i]), f); + util_last_bit(enabled_slots[i] | required_slots[i]), + remap_func[i], f); } static void si_dump_gfx_descriptors(struct si_context *sctx, @@ -805,7 +818,8 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, } si_dump_descriptor_list(&sctx->descriptors[SI_DESCS_RW_BUFFERS], - "", "RW buffers", SI_NUM_RW_BUFFERS, f); + "", "RW buffers", SI_NUM_RW_BUFFERS, + si_identity, f); si_dump_gfx_descriptors(sctx, &sctx->vs_shader, f); si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, f); si_dump_gfx_descriptors(sctx, &sctx->tes_shader, f); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index c92a6575ede..2e3a9c5f9e2 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -936,11 +936,15 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers, unsigned num_buffers, unsigned shader_userdata_index, enum radeon_bo_usage shader_usage, + enum radeon_bo_usage shader_usage_constbuf, enum radeon_bo_priority priority, + enum radeon_bo_priority priority_constbuf, unsigned *ce_offset) { buffers->shader_usage = shader_usage; + buffers->shader_usage_constbuf = shader_usage_constbuf; buffers->priority = priority; + buffers->priority_constbuf = priority_constbuf; buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); si_init_descriptors(descs, shader_userdata_index, 4, @@ -969,8 +973,11 @@ static void si_buffer_resources_begin_new_cs(struct si_context *sctx, int i = u_bit_scan(&mask); radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - (struct r600_resource*)buffers->buffers[i], - buffers->shader_usage, buffers->priority); + r600_resource(buffers->buffers[i]), + i < SI_NUM_SHADER_BUFFERS ? buffers->shader_usage : + buffers->shader_usage_constbuf, + i < SI_NUM_SHADER_BUFFERS ? buffers->priority : + buffers->priority_constbuf); } } @@ -1119,16 +1126,16 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) /* CONSTANT BUFFERS */ static unsigned -si_const_buffer_descriptors_idx(unsigned shader) +si_const_and_shader_buffer_descriptors_idx(unsigned shader) { return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + - SI_SHADER_DESCS_CONST_BUFFERS; + SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS; } static struct si_descriptors * -si_const_buffer_descriptors(struct si_context *sctx, unsigned shader) +si_const_and_shader_buffer_descriptors(struct si_context *sctx, unsigned shader) { - return &sctx->descriptors[si_const_buffer_descriptors_idx(shader)]; + return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)]; } void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer, @@ -1199,8 +1206,8 @@ static void si_set_constant_buffer(struct si_context *sctx, buffers->buffers[slot] = buffer; radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, (struct r600_resource*)buffer, - buffers->shader_usage, - buffers->priority, true); + buffers->shader_usage_constbuf, + buffers->priority_constbuf, true); buffers->enabled_mask |= 1u << slot; } else { /* Clear the descriptor. */ @@ -1228,8 +1235,9 @@ static void si_pipe_set_constant_buffer(struct pipe_context *ctx, if (shader >= SI_NUM_SHADERS) return; - si_set_constant_buffer(sctx, &sctx->const_buffers[shader], - si_const_buffer_descriptors_idx(shader), + slot = si_get_constbuf_slot(slot); + si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader], + si_const_and_shader_buffer_descriptors_idx(shader), slot, input); } @@ -1238,35 +1246,22 @@ void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, { cbuf->user_buffer = NULL; si_get_buffer_from_descriptors( - &sctx->const_buffers[shader], - si_const_buffer_descriptors(sctx, shader), - slot, &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size); + &sctx->const_and_shader_buffers[shader], + si_const_and_shader_buffer_descriptors(sctx, shader), + si_get_constbuf_slot(slot), + &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size); } /* SHADER BUFFERS */ -static unsigned -si_shader_buffer_descriptors_idx(enum pipe_shader_type shader) -{ - return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + - SI_SHADER_DESCS_SHADER_BUFFERS; -} - -static struct si_descriptors * -si_shader_buffer_descriptors(struct si_context *sctx, - enum pipe_shader_type shader) -{ - return &sctx->descriptors[si_shader_buffer_descriptors_idx(shader)]; -} - static void si_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader, unsigned start_slot, unsigned count, const struct pipe_shader_buffer *sbuffers) { struct si_context *sctx = (struct si_context *)ctx; - struct si_buffer_resources *buffers = &sctx->shader_buffers[shader]; - struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader); + struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; + struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader); unsigned i; assert(start_slot + count <= SI_NUM_SHADER_BUFFERS); @@ -1274,7 +1269,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx, for (i = 0; i < count; ++i) { const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL; struct r600_resource *buf; - unsigned slot = start_slot + i; + unsigned slot = si_get_shaderbuf_slot(start_slot + i); uint32_t *desc = descs->list + slot * 4; uint64_t va; @@ -1284,7 +1279,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx, buffers->enabled_mask &= ~(1u << slot); descs->dirty_mask |= 1u << slot; sctx->descriptors_dirty |= - 1u << si_shader_buffer_descriptors_idx(shader); + 1u << si_const_and_shader_buffer_descriptors_idx(shader); continue; } @@ -1311,7 +1306,7 @@ static void si_set_shader_buffers(struct pipe_context *ctx, buffers->enabled_mask |= 1u << slot; descs->dirty_mask |= 1u << slot; sctx->descriptors_dirty |= - 1u << si_shader_buffer_descriptors_idx(shader); + 1u << si_const_and_shader_buffer_descriptors_idx(shader); util_range_add(&buf->valid_buffer_range, sbuffer->buffer_offset, sbuffer->buffer_offset + sbuffer->buffer_size); @@ -1323,12 +1318,13 @@ void si_get_shader_buffers(struct si_context *sctx, uint start_slot, uint count, struct pipe_shader_buffer *sbuf) { - struct si_buffer_resources *buffers = &sctx->shader_buffers[shader]; - struct si_descriptors *descs = si_shader_buffer_descriptors(sctx, shader); + struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader]; + struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader); for (unsigned i = 0; i < count; ++i) { si_get_buffer_from_descriptors( - buffers, descs, start_slot + i, + buffers, descs, + si_get_shaderbuf_slot(start_slot + i), &sbuf[i].buffer, &sbuf[i].buffer_offset, &sbuf[i].buffer_size); } @@ -1603,11 +1599,14 @@ void si_update_compressed_colortex_masks(struct si_context *sctx) static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers, unsigned descriptors_idx, + unsigned slot_mask, struct pipe_resource *buf, - uint64_t old_va) + uint64_t old_va, + enum radeon_bo_usage usage, + enum radeon_bo_priority priority) { struct si_descriptors *descs = &sctx->descriptors[descriptors_idx]; - unsigned mask = buffers->enabled_mask; + unsigned mask = buffers->enabled_mask & slot_mask; while (mask) { unsigned i = u_bit_scan(&mask); @@ -1620,8 +1619,7 @@ static void si_reset_buffer_resources(struct si_context *sctx, radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, (struct r600_resource *)buf, - buffers->shader_usage, - buffers->priority, true); + usage, priority, true); } } } @@ -1690,16 +1688,22 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf /* Constant and shader buffers. */ if (rbuffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) - si_reset_buffer_resources(sctx, &sctx->const_buffers[shader], - si_const_buffer_descriptors_idx(shader), - buf, old_va); + si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], + si_const_and_shader_buffer_descriptors_idx(shader), + u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS), + buf, old_va, + sctx->const_and_shader_buffers[shader].shader_usage_constbuf, + sctx->const_and_shader_buffers[shader].priority_constbuf); } if (rbuffer->bind_history & PIPE_BIND_SHADER_BUFFER) { for (shader = 0; shader < SI_NUM_SHADERS; shader++) - si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader], - si_shader_buffer_descriptors_idx(shader), - buf, old_va); + si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader], + si_const_and_shader_buffer_descriptors_idx(shader), + u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS), + buf, old_va, + sctx->const_and_shader_buffers[shader].shader_usage, + sctx->const_and_shader_buffers[shader].priority); } if (rbuffer->bind_history & PIPE_BIND_SAMPLER_VIEW) { @@ -2000,8 +2004,8 @@ void si_init_all_descriptors(struct si_context *sctx) int i; unsigned ce_offset = 0; - STATIC_ASSERT(GFX9_SGPR_TCS_CONST_BUFFERS % 2 == 0); - STATIC_ASSERT(GFX9_SGPR_GS_CONST_BUFFERS % 2 == 0); + STATIC_ASSERT(GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS % 2 == 0); + STATIC_ASSERT(GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS % 2 == 0); for (i = 0; i < SI_NUM_SHADERS; i++) { bool gfx9_tcs = sctx->b.chip_class == GFX9 && @@ -2013,27 +2017,23 @@ void si_init_all_descriptors(struct si_context *sctx) */ bool big_ce = sctx->b.chip_class <= VI; bool images_use_ce = big_ce; - bool shaderbufs_use_ce = big_ce || - i == PIPE_SHADER_COMPUTE; + bool const_and_shaderbufs_use_ce = big_ce || + i == PIPE_SHADER_VERTEX || + i == PIPE_SHADER_FRAGMENT; bool samplers_use_ce = big_ce || i == PIPE_SHADER_FRAGMENT; - si_init_buffer_resources(&sctx->const_buffers[i], - si_const_buffer_descriptors(sctx, i), - SI_NUM_CONST_BUFFERS, - gfx9_tcs ? GFX9_SGPR_TCS_CONST_BUFFERS : - gfx9_gs ? GFX9_SGPR_GS_CONST_BUFFERS : - SI_SGPR_CONST_BUFFERS, - RADEON_USAGE_READ, RADEON_PRIO_CONST_BUFFER, - &ce_offset); - si_init_buffer_resources(&sctx->shader_buffers[i], - si_shader_buffer_descriptors(sctx, i), - SI_NUM_SHADER_BUFFERS, - gfx9_tcs ? GFX9_SGPR_TCS_SHADER_BUFFERS : - gfx9_gs ? GFX9_SGPR_GS_SHADER_BUFFERS : - SI_SGPR_SHADER_BUFFERS, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER, - shaderbufs_use_ce ? &ce_offset : NULL); + si_init_buffer_resources(&sctx->const_and_shader_buffers[i], + si_const_and_shader_buffer_descriptors(sctx, i), + SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS, + gfx9_tcs ? GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS : + gfx9_gs ? GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS : + SI_SGPR_CONST_AND_SHADER_BUFFERS, + RADEON_USAGE_READWRITE, + RADEON_USAGE_READ, + RADEON_PRIO_SHADER_RW_BUFFER, + RADEON_PRIO_CONST_BUFFER, + const_and_shaderbufs_use_ce ? &ce_offset : NULL); si_init_descriptors(si_sampler_descriptors(sctx, i), gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS : @@ -2055,7 +2055,10 @@ void si_init_all_descriptors(struct si_context *sctx) si_init_buffer_resources(&sctx->rw_buffers, &sctx->descriptors[SI_DESCS_RW_BUFFERS], SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS, - RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RINGS, + /* The second set of usage/priority is used by + * const buffers in RW buffer slots. */ + RADEON_USAGE_READWRITE, RADEON_USAGE_READ, + RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER, &ce_offset); si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL); @@ -2148,10 +2151,8 @@ void si_release_all_descriptors(struct si_context *sctx) int i; for (i = 0; i < SI_NUM_SHADERS; i++) { - si_release_buffer_resources(&sctx->const_buffers[i], - si_const_buffer_descriptors(sctx, i)); - si_release_buffer_resources(&sctx->shader_buffers[i], - si_shader_buffer_descriptors(sctx, i)); + si_release_buffer_resources(&sctx->const_and_shader_buffers[i], + si_const_and_shader_buffer_descriptors(sctx, i)); si_release_sampler_views(&sctx->samplers[i].views); si_release_image_views(&sctx->images[i]); } @@ -2168,8 +2169,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx) int i; for (i = 0; i < SI_NUM_SHADERS; i++) { - si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]); - si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]); + si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]); si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views); si_image_views_begin_new_cs(sctx, &sctx->images[i]); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 431d8a3a429..449a802f76b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -295,8 +295,7 @@ struct si_context { unsigned shader_pointers_dirty; unsigned compressed_tex_shader_mask; struct si_buffer_resources rw_buffers; - struct si_buffer_resources const_buffers[SI_NUM_SHADERS]; - struct si_buffer_resources shader_buffers[SI_NUM_SHADERS]; + struct si_buffer_resources const_and_shader_buffers[SI_NUM_SHADERS]; struct si_textures_info samplers[SI_NUM_SHADERS]; struct si_images_info images[SI_NUM_SHADERS]; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a49449b4ef5..8c5bcb9f576 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1726,10 +1726,10 @@ static void declare_compute_memory(struct si_shader_context *ctx, static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) { LLVMValueRef list_ptr = LLVMGetParam(ctx->main_fn, - ctx->param_const_buffers); + ctx->param_const_and_shader_buffers); return ac_build_indexed_load_const(&ctx->ac, list_ptr, - LLVMConstInt(ctx->i32, i, 0)); + LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0)); } static LLVMValueRef fetch_constant( @@ -1759,11 +1759,13 @@ static LLVMValueRef fetch_constant( idx = reg->Register.Index * 4 + swizzle; if (reg->Register.Dimension && reg->Dimension.Indirect) { - LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_buffers); + LLVMValueRef ptr = LLVMGetParam(ctx->main_fn, ctx->param_const_and_shader_buffers); LLVMValueRef index; index = si_get_bounded_indirect_index(ctx, ®->DimIndirect, reg->Dimension.Index, SI_NUM_CONST_BUFFERS); + index = LLVMBuildAdd(ctx->gallivm.builder, index, + LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); bufp = ac_build_indexed_load_const(&ctx->ac, ptr, index); } else bufp = load_const_buffer_desc(ctx, buf); @@ -2796,13 +2798,11 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) unsigned desc_param = ctx->param_tcs_factor_addr_base64k + 2; ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, - 8 + GFX9_SGPR_TCS_CONST_BUFFERS); + 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, 8 + GFX9_SGPR_TCS_SAMPLERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2, 8 + GFX9_SGPR_TCS_IMAGES); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3, - 8 + GFX9_SGPR_TCS_SHADER_BUFFERS); unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; ret = si_insert_input_ret_float(ctx, ret, @@ -2825,13 +2825,11 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) unsigned desc_param = ctx->param_vs_state_bits + 1; ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, - 8 + GFX9_SGPR_GS_CONST_BUFFERS); + 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, 8 + GFX9_SGPR_GS_SAMPLERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2, 8 + GFX9_SGPR_GS_IMAGES); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 3, - 8 + GFX9_SGPR_GS_SHADER_BUFFERS); unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR; for (unsigned i = 0; i < 5; i++) { @@ -4061,16 +4059,15 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, unsigned *num_params, bool assign_params) { - params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_CONST_BUFFERS); + params[(*num_params)++] = si_const_array(ctx->v4i32, + SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS); params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS); params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES); - params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS); if (assign_params) { - ctx->param_const_buffers = *num_params - 4; - ctx->param_samplers = *num_params - 3; - ctx->param_images = *num_params - 2; - ctx->param_shader_buffers = *num_params - 1; + ctx->param_const_and_shader_buffers = *num_params - 3; + ctx->param_samplers = *num_params - 2; + ctx->param_images = *num_params - 1; } } @@ -6670,7 +6667,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; @@ -6685,7 +6681,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; @@ -7042,10 +7037,9 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, /* Declare input SGPRs. */ params[ctx->param_rw_buffers = num_params++] = ctx->i64; - params[ctx->param_const_buffers = num_params++] = ctx->i64; + params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64; params[ctx->param_samplers = num_params++] = ctx->i64; params[ctx->param_images = num_params++] = ctx->i64; - params[ctx->param_shader_buffers = num_params++] = ctx->i64; assert(num_params == SI_PARAM_ALPHA_REF); params[SI_PARAM_ALPHA_REF] = ctx->f32; last_sgpr = SI_PARAM_ALPHA_REF; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1627de3980b..08e809c56b7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -157,14 +157,12 @@ enum { */ SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */ SI_SGPR_RW_BUFFERS_HI, - SI_SGPR_CONST_BUFFERS, - SI_SGPR_CONST_BUFFERS_HI, + SI_SGPR_CONST_AND_SHADER_BUFFERS, + SI_SGPR_CONST_AND_SHADER_BUFFERS_HI, SI_SGPR_SAMPLERS, /* images & sampler states interleaved */ SI_SGPR_SAMPLERS_HI, SI_SGPR_IMAGES, SI_SGPR_IMAGES_HI, - SI_SGPR_SHADER_BUFFERS, - SI_SGPR_SHADER_BUFFERS_HI, SI_NUM_RESOURCE_SGPRS, /* all VS variants */ @@ -197,25 +195,21 @@ enum { GFX9_SGPR_TCS_OFFCHIP_ADDR_BASE64K, GFX9_SGPR_TCS_FACTOR_ADDR_BASE64K, GFX9_SGPR_unused_to_align_the_next_pointer, - GFX9_SGPR_TCS_CONST_BUFFERS, - GFX9_SGPR_TCS_CONST_BUFFERS_HI, + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS, + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI, GFX9_SGPR_TCS_SAMPLERS, /* images & sampler states interleaved */ GFX9_SGPR_TCS_SAMPLERS_HI, GFX9_SGPR_TCS_IMAGES, GFX9_SGPR_TCS_IMAGES_HI, - GFX9_SGPR_TCS_SHADER_BUFFERS, - GFX9_SGPR_TCS_SHADER_BUFFERS_HI, GFX9_TCS_NUM_USER_SGPR, /* GFX9: Merged ES-GS (VS-GS or TES-GS). */ - GFX9_SGPR_GS_CONST_BUFFERS = SI_VS_NUM_USER_SGPR, - GFX9_SGPR_GS_CONST_BUFFERS_HI, + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR, + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI, GFX9_SGPR_GS_SAMPLERS, GFX9_SGPR_GS_SAMPLERS_HI, GFX9_SGPR_GS_IMAGES, GFX9_SGPR_GS_IMAGES_HI, - GFX9_SGPR_GS_SHADER_BUFFERS, - GFX9_SGPR_GS_SHADER_BUFFERS_HI, GFX9_GS_NUM_USER_SGPR, /* GS limits */ @@ -229,7 +223,7 @@ enum { /* LLVM function parameter indices */ enum { - SI_NUM_RESOURCE_PARAMS = 5, + SI_NUM_RESOURCE_PARAMS = 4, /* PS only parameters */ SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS, diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 69e6dfcf920..9fd027d4cbd 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -109,10 +109,9 @@ struct si_shader_context { /* Parameter indices for LLVMGetParam. */ int param_rw_buffers; - int param_const_buffers; + int param_const_and_shader_buffers; int param_samplers; int param_images; - int param_shader_buffers; /* Common inputs for merged shaders. */ int param_merged_wave_info; int param_merged_scratch_offset; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index a47d5bbae3b..1e5842b818f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -84,14 +84,19 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx, { LLVMValueRef index; LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn, - ctx->param_shader_buffers); + ctx->param_const_and_shader_buffers); - if (!reg->Register.Indirect) - index = LLVMConstInt(ctx->i32, reg->Register.Index, 0); - else + if (!reg->Register.Indirect) { + index = LLVMConstInt(ctx->i32, + si_get_shaderbuf_slot(reg->Register.Index), 0); + } else { index = si_get_bounded_indirect_index(ctx, ®->Indirect, reg->Register.Index, SI_NUM_SHADER_BUFFERS); + index = LLVMBuildSub(ctx->gallivm.builder, + LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0), + index, ""); + } return ac_build_indexed_load_const(&ctx->ac, rsrc_ptr, index); } diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 629d614f7fc..90d09720968 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -194,11 +194,12 @@ enum { * 21 - compute const buffers * ... */ -#define SI_SHADER_DESCS_CONST_BUFFERS 0 -#define SI_SHADER_DESCS_SHADER_BUFFERS 1 -#define SI_SHADER_DESCS_SAMPLERS 2 -#define SI_SHADER_DESCS_IMAGES 3 -#define SI_NUM_SHADER_DESCS 4 +enum { + SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS, + SI_SHADER_DESCS_SAMPLERS, + SI_SHADER_DESCS_IMAGES, + SI_NUM_SHADER_DESCS, +}; #define SI_DESCS_RW_BUFFERS 0 #define SI_DESCS_FIRST_SHADER 1 @@ -251,7 +252,9 @@ struct si_sampler_views { struct si_buffer_resources { enum radeon_bo_usage shader_usage; /* READ, WRITE, or READWRITE */ + enum radeon_bo_usage shader_usage_constbuf; enum radeon_bo_priority priority; + enum radeon_bo_priority priority_constbuf; struct pipe_resource **buffers; /* this has num_buffers elements */ /* The i-th bit is set if that element is enabled (non-NULL resource). */ @@ -372,4 +375,16 @@ si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil) return rtex->surface.u.legacy.tiling_index[level]; } +static inline unsigned si_get_constbuf_slot(unsigned slot) +{ + /* Constant buffers are in slots [16..31], ascending */ + return SI_NUM_SHADER_BUFFERS + slot; +} + +static inline unsigned si_get_shaderbuf_slot(unsigned slot) +{ + /* shader buffers are in slots [15..0], descending */ + return SI_NUM_SHADER_BUFFERS - 1 - slot; +} + #endif -- 2.30.2