From f07c15ef807fb50659bf7a648393991f582f6a7f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 13 May 2017 17:16:27 +0200 Subject: [PATCH] radeonsi: merge sampler and image descriptor lists into one MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Sampler slots: slot[8], .. slot[39] (ascending) Image slots: slot[7], .. slot[0] (descending) Each image occupies 1/2 of each slot, so there are 16 images in total, therefore the layout is: slot[15], .. slot[0]. (in 1/2 slot increments) Updating image slot 2n+i (i <= 1) also dirties and re-uploads slot 2n+!i. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_descriptors.c | 134 ++++++++---------- src/gallium/drivers/radeonsi/si_shader.c | 22 +-- src/gallium/drivers/radeonsi/si_shader.h | 20 +-- .../drivers/radeonsi/si_shader_internal.h | 3 +- .../drivers/radeonsi/si_shader_tgsi_mem.c | 15 +- src/gallium/drivers/radeonsi/si_state.h | 17 ++- 6 files changed, 99 insertions(+), 112 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 2e3a9c5f9e2..5703f59dbdb 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -99,17 +99,14 @@ static void si_init_descriptors(struct si_descriptors *desc, unsigned shader_userdata_index, unsigned element_dw_size, unsigned num_elements, - const uint32_t *null_descriptor, unsigned *ce_offset) { - int i; - assert(num_elements <= sizeof(desc->dirty_mask)*8); desc->list = CALLOC(num_elements, element_dw_size * 4); desc->element_dw_size = element_dw_size; desc->num_elements = num_elements; - desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1; + desc->dirty_mask = u_bit_consecutive64(0, num_elements); desc->shader_userdata_offset = shader_userdata_index * 4; if (ce_offset) { @@ -119,14 +116,6 @@ static void si_init_descriptors(struct si_descriptors *desc, /* make sure that ce_offset stays 32 byte aligned */ *ce_offset += align(element_dw_size * num_elements * 4, 32); } - - /* Initialize the array to NULL descriptors if the element size is 8. */ - if (null_descriptor) { - assert(element_dw_size % 8 == 0); - for (i = 0; i < num_elements * element_dw_size / 8; i++) - memcpy(desc->list + i * 8, null_descriptor, - 8 * 4); - } } static void si_release_descriptors(struct si_descriptors *desc) @@ -219,8 +208,8 @@ static bool si_upload_descriptors(struct si_context *sctx, while(desc->dirty_mask) { int begin, count; - u_bit_scan_consecutive_range(&desc->dirty_mask, &begin, - &count); + u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin, + &count); begin *= desc->element_dw_size; count *= desc->element_dw_size; @@ -273,16 +262,16 @@ si_descriptors_begin_new_cs(struct si_context *sctx, struct si_descriptors *desc /* SAMPLER VIEWS */ static unsigned -si_sampler_descriptors_idx(unsigned shader) +si_sampler_and_image_descriptors_idx(unsigned shader) { return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + - SI_SHADER_DESCS_SAMPLERS; + SI_SHADER_DESCS_SAMPLERS_AND_IMAGES; } static struct si_descriptors * -si_sampler_descriptors(struct si_context *sctx, unsigned shader) +si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader) { - return &sctx->descriptors[si_sampler_descriptors_idx(shader)]; + return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)]; } static void si_release_sampler_views(struct si_sampler_views *views) @@ -474,8 +463,9 @@ static void si_set_sampler_view(struct si_context *sctx, { struct si_sampler_views *views = &sctx->samplers[shader].views; struct si_sampler_view *rview = (struct si_sampler_view*)view; - struct si_descriptors *descs = si_sampler_descriptors(sctx, shader); - uint32_t *desc = descs->list + slot * 16; + struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader); + unsigned desc_slot = si_get_sampler_slot(slot); + uint32_t *desc = descs->list + desc_slot * 16; if (views->views[slot] == view && !disallow_early_out) return; @@ -549,8 +539,8 @@ static void si_set_sampler_view(struct si_context *sctx, views->enabled_mask &= ~(1u << slot); } - descs->dirty_mask |= 1u << slot; - sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader); + descs->dirty_mask |= 1ull << desc_slot; + sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); } static bool is_compressed_colortex(struct r600_texture *rtex) @@ -656,19 +646,6 @@ si_samplers_update_compressed_colortex_mask(struct si_textures_info *samplers) /* IMAGE VIEWS */ -static unsigned -si_image_descriptors_idx(unsigned shader) -{ - return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS + - SI_SHADER_DESCS_IMAGES; -} - -static struct si_descriptors* -si_image_descriptors(struct si_context *sctx, unsigned shader) -{ - return &sctx->descriptors[si_image_descriptors_idx(shader)]; -} - static void si_release_image_views(struct si_images_info *images) { @@ -704,15 +681,17 @@ si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot) struct si_images_info *images = &ctx->images[shader]; if (images->enabled_mask & (1u << slot)) { - struct si_descriptors *descs = si_image_descriptors(ctx, shader); + struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); + unsigned desc_slot = si_get_image_slot(slot); pipe_resource_reference(&images->views[slot].resource, NULL); images->compressed_colortex_mask &= ~(1 << slot); - memcpy(descs->list + slot*8, null_image_descriptor, 8*4); + memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4); images->enabled_mask &= ~(1u << slot); - descs->dirty_mask |= 1u << slot; - ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader); + /* two 8-byte images share one 16-byte slot */ + descs->dirty_mask |= 1u << (desc_slot / 2); + ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); } } @@ -735,9 +714,10 @@ static void si_set_shader_image(struct si_context *ctx, { struct si_screen *screen = ctx->screen; struct si_images_info *images = &ctx->images[shader]; - struct si_descriptors *descs = si_image_descriptors(ctx, shader); + struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader); struct r600_resource *res; - uint32_t *desc = descs->list + slot * 8; + unsigned desc_slot = si_get_image_slot(slot); + uint32_t *desc = descs->list + desc_slot * 8; if (!view || !view->resource) { si_disable_shader_image(ctx, shader, slot); @@ -831,8 +811,9 @@ static void si_set_shader_image(struct si_context *ctx, } images->enabled_mask |= 1u << slot; - descs->dirty_mask |= 1u << slot; - ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader); + /* two 8-byte images share one 16-byte slot */ + descs->dirty_mask |= 1u << (desc_slot / 2); + ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); /* Since this can flush, it must be done after enabled_mask is updated. */ si_sampler_view_add_buffer(ctx, &res->b.b, @@ -895,7 +876,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx, { struct si_context *sctx = (struct si_context *)ctx; struct si_textures_info *samplers = &sctx->samplers[shader]; - struct si_descriptors *desc = si_sampler_descriptors(sctx, shader); + struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader); struct si_sampler_state **sstates = (struct si_sampler_state**)states; int i; @@ -904,6 +885,7 @@ static void si_bind_sampler_states(struct pipe_context *ctx, for (i = 0; i < count; i++) { unsigned slot = start + i; + unsigned desc_slot = si_get_sampler_slot(slot); if (!sstates[i] || sstates[i] == samplers->views.sampler_states[slot]) @@ -923,9 +905,9 @@ static void si_bind_sampler_states(struct pipe_context *ctx, ((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size) continue; - memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4); - desc->dirty_mask |= 1u << slot; - sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader); + memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4); + desc->dirty_mask |= 1ull << desc_slot; + sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); } } @@ -948,7 +930,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers, buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*)); si_init_descriptors(descs, shader_userdata_index, 4, - num_buffers, NULL, ce_offset); + num_buffers, ce_offset); } static void si_release_buffer_resources(struct si_buffer_resources *buffers, @@ -1711,19 +1693,21 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf for (shader = 0; shader < SI_NUM_SHADERS; shader++) { struct si_sampler_views *views = &sctx->samplers[shader].views; struct si_descriptors *descs = - si_sampler_descriptors(sctx, shader); + si_sampler_and_image_descriptors(sctx, shader); unsigned mask = views->enabled_mask; while (mask) { unsigned i = u_bit_scan(&mask); if (views->views[i]->texture == buf) { + unsigned desc_slot = si_get_sampler_slot(i); + si_desc_reset_buffer_offset(ctx, descs->list + - i * 16 + 4, + desc_slot * 16 + 4, old_va, buf); - descs->dirty_mask |= 1u << i; + descs->dirty_mask |= 1ull << desc_slot; sctx->descriptors_dirty |= - 1u << si_sampler_descriptors_idx(shader); + 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx, rbuffer, RADEON_USAGE_READ, @@ -1739,22 +1723,25 @@ static void si_rebind_buffer(struct pipe_context *ctx, struct pipe_resource *buf for (shader = 0; shader < SI_NUM_SHADERS; ++shader) { struct si_images_info *images = &sctx->images[shader]; struct si_descriptors *descs = - si_image_descriptors(sctx, shader); + si_sampler_and_image_descriptors(sctx, shader); unsigned mask = images->enabled_mask; while (mask) { unsigned i = u_bit_scan(&mask); if (images->views[i].resource == buf) { + unsigned desc_slot = si_get_image_slot(i); + if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE) si_mark_image_range_valid(&images->views[i]); si_desc_reset_buffer_offset( - ctx, descs->list + i * 8 + 4, + ctx, descs->list + desc_slot * 8 + 4, old_va, buf); - descs->dirty_mask |= 1u << i; + /* two 8-byte images share one 16-byte slot */ + descs->dirty_mask |= 1u << (desc_slot / 2); sctx->descriptors_dirty |= - 1u << si_image_descriptors_idx(shader); + 1u << si_sampler_and_image_descriptors_idx(shader); radeon_add_to_buffer_list_check_mem( &sctx->b, &sctx->b.gfx, rbuffer, @@ -2016,12 +2003,11 @@ void si_init_all_descriptors(struct si_context *sctx) * Rarely used descriptors don't use CE RAM. */ bool big_ce = sctx->b.chip_class <= VI; - bool images_use_ce = big_ce; bool const_and_shaderbufs_use_ce = big_ce || i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT; - bool samplers_use_ce = big_ce || - i == PIPE_SHADER_FRAGMENT; + bool samplers_and_images_use_ce = big_ce || + i == PIPE_SHADER_FRAGMENT; si_init_buffer_resources(&sctx->const_and_shader_buffers[i], si_const_and_shader_buffer_descriptors(sctx, i), @@ -2035,21 +2021,19 @@ void si_init_all_descriptors(struct si_context *sctx) RADEON_PRIO_CONST_BUFFER, const_and_shaderbufs_use_ce ? &ce_offset : NULL); - si_init_descriptors(si_sampler_descriptors(sctx, i), - gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS : - gfx9_gs ? GFX9_SGPR_GS_SAMPLERS : - SI_SGPR_SAMPLERS, - 16, SI_NUM_SAMPLERS, - null_texture_descriptor, - samplers_use_ce ? &ce_offset : NULL); - - si_init_descriptors(si_image_descriptors(sctx, i), - gfx9_tcs ? GFX9_SGPR_TCS_IMAGES : - gfx9_gs ? GFX9_SGPR_GS_IMAGES : - SI_SGPR_IMAGES, - 8, SI_NUM_IMAGES, - null_image_descriptor, - images_use_ce ? &ce_offset : NULL); + struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i); + si_init_descriptors(desc, + gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES : + gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES : + SI_SGPR_SAMPLERS_AND_IMAGES, + 16, SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS, + samplers_and_images_use_ce ? &ce_offset : NULL); + + int j; + for (j = 0; j < SI_NUM_IMAGES; j++) + memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4); + for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++) + memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4); } si_init_buffer_resources(&sctx->rw_buffers, @@ -2061,7 +2045,7 @@ void si_init_all_descriptors(struct si_context *sctx) RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER, &ce_offset); si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS, - 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL); + 4, SI_NUM_VERTEX_BUFFERS, NULL); sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 8c5bcb9f576..f847e46e5dc 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2800,9 +2800,7 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, 8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_TCS_SAMPLERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2, - 8 + GFX9_SGPR_TCS_IMAGES); + 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES); unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; ret = si_insert_input_ret_float(ctx, ret, @@ -2827,9 +2825,7 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx) ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param, 8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS); ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1, - 8 + GFX9_SGPR_GS_SAMPLERS); - ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2, - 8 + GFX9_SGPR_GS_IMAGES); + 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES); unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR; for (unsigned i = 0; i < 5; i++) { @@ -4061,13 +4057,12 @@ static void declare_per_stage_desc_pointers(struct si_shader_context *ctx, { params[(*num_params)++] = si_const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS); - params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS); - params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES); + params[(*num_params)++] = si_const_array(ctx->v8i32, + SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2); if (assign_params) { - ctx->param_const_and_shader_buffers = *num_params - 3; - ctx->param_samplers = *num_params - 2; - ctx->param_images = *num_params - 1; + ctx->param_const_and_shader_buffers = *num_params - 2; + ctx->param_samplers_and_images = *num_params - 1; } } @@ -6666,7 +6661,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; @@ -6680,7 +6674,6 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; params[num_params++] = ctx->i64; - params[num_params++] = ctx->i64; params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32; params[num_params++] = ctx->i32; params[num_params++] = ctx->i32; @@ -7038,8 +7031,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, /* Declare input SGPRs. */ params[ctx->param_rw_buffers = num_params++] = ctx->i64; params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64; - params[ctx->param_samplers = num_params++] = ctx->i64; - params[ctx->param_images = num_params++] = ctx->i64; + params[ctx->param_samplers_and_images = num_params++] = ctx->i64; assert(num_params == SI_PARAM_ALPHA_REF); params[SI_PARAM_ALPHA_REF] = ctx->f32; last_sgpr = SI_PARAM_ALPHA_REF; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 08e809c56b7..ffb7dc3b81d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -159,10 +159,8 @@ enum { SI_SGPR_RW_BUFFERS_HI, SI_SGPR_CONST_AND_SHADER_BUFFERS, SI_SGPR_CONST_AND_SHADER_BUFFERS_HI, - SI_SGPR_SAMPLERS, /* images & sampler states interleaved */ - SI_SGPR_SAMPLERS_HI, - SI_SGPR_IMAGES, - SI_SGPR_IMAGES_HI, + SI_SGPR_SAMPLERS_AND_IMAGES, + SI_SGPR_SAMPLERS_AND_IMAGES_HI, SI_NUM_RESOURCE_SGPRS, /* all VS variants */ @@ -197,19 +195,15 @@ enum { GFX9_SGPR_unused_to_align_the_next_pointer, GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS, GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS_HI, - GFX9_SGPR_TCS_SAMPLERS, /* images & sampler states interleaved */ - GFX9_SGPR_TCS_SAMPLERS_HI, - GFX9_SGPR_TCS_IMAGES, - GFX9_SGPR_TCS_IMAGES_HI, + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES, + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES_HI, GFX9_TCS_NUM_USER_SGPR, /* GFX9: Merged ES-GS (VS-GS or TES-GS). */ GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS = SI_VS_NUM_USER_SGPR, GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS_HI, - GFX9_SGPR_GS_SAMPLERS, - GFX9_SGPR_GS_SAMPLERS_HI, - GFX9_SGPR_GS_IMAGES, - GFX9_SGPR_GS_IMAGES_HI, + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES, + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES_HI, GFX9_GS_NUM_USER_SGPR, /* GS limits */ @@ -223,7 +217,7 @@ enum { /* LLVM function parameter indices */ enum { - SI_NUM_RESOURCE_PARAMS = 4, + SI_NUM_RESOURCE_PARAMS = 3, /* PS only parameters */ SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS, diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 9fd027d4cbd..5094023831d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -110,8 +110,7 @@ struct si_shader_context { /* Parameter indices for LLVMGetParam. */ int param_rw_buffers; int param_const_and_shader_buffers; - int param_samplers; - int param_images; + int param_samplers_and_images; /* Common inputs for merged shaders. */ int param_merged_wave_info; int param_merged_scratch_offset; diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 1e5842b818f..89f3f94f55c 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -180,7 +180,7 @@ image_fetch_rsrc( { struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn, - ctx->param_images); + ctx->param_samplers_and_images); LLVMValueRef index; bool dcc_off = is_store; @@ -191,7 +191,8 @@ image_fetch_rsrc( unsigned images_writemask = info->images_store | info->images_atomic; - index = LLVMConstInt(ctx->i32, image->Register.Index, 0); + index = LLVMConstInt(ctx->i32, + si_get_image_slot(image->Register.Index), 0); if (images_writemask & (1 << image->Register.Index)) dcc_off = true; @@ -208,6 +209,9 @@ image_fetch_rsrc( index = si_get_bounded_indirect_index(ctx, &image->Indirect, image->Register.Index, SI_NUM_IMAGES); + index = LLVMBuildSub(ctx->gallivm.builder, + LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0), + index, ""); } *rsrc = load_image_desc(ctx, rsrc_ptr, index, target); @@ -1181,7 +1185,7 @@ static void tex_fetch_ptrs( LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr) { struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers); + LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images); const struct tgsi_full_instruction *inst = emit_data->inst; const struct tgsi_full_src_register *reg; unsigned target = inst->Texture.Texture; @@ -1196,8 +1200,11 @@ static void tex_fetch_ptrs( ®->Indirect, reg->Register.Index, SI_NUM_SAMPLERS); + index = LLVMBuildAdd(ctx->gallivm.builder, index, + LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), ""); } else { - index = LLVMConstInt(ctx->i32, reg->Register.Index, 0); + index = LLVMConstInt(ctx->i32, + si_get_sampler_slot(reg->Register.Index), 0); } if (target == TGSI_TEXTURE_BUFFER) diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 90d09720968..c4ef90372fb 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -196,8 +196,7 @@ enum { */ enum { SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS, - SI_SHADER_DESCS_SAMPLERS, - SI_SHADER_DESCS_IMAGES, + SI_SHADER_DESCS_SAMPLERS_AND_IMAGES, SI_NUM_SHADER_DESCS, }; @@ -229,7 +228,7 @@ struct si_descriptors { unsigned ce_offset; /* elements of the list that are changed and need to be uploaded */ - unsigned dirty_mask; + uint64_t dirty_mask; /* Whether CE is used to upload this descriptor array. */ bool uses_ce; @@ -387,4 +386,16 @@ static inline unsigned si_get_shaderbuf_slot(unsigned slot) return SI_NUM_SHADER_BUFFERS - 1 - slot; } +static inline unsigned si_get_sampler_slot(unsigned slot) +{ + /* samplers are in slots [8..39], ascending */ + return SI_NUM_IMAGES / 2 + slot; +} + +static inline unsigned si_get_image_slot(unsigned slot) +{ + /* images are in slots [15..0] (sampler slots [7..0]), descending */ + return SI_NUM_IMAGES - 1 - slot; +} + #endif -- 2.30.2