From 53c2ef36da79df926d5c3eac81f60ebe6009dda8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 16 May 2017 00:03:52 +0200 Subject: [PATCH] radeonsi: record which descriptor slots are used by shaders MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_compute.c | 3 +++ src/gallium/drivers/radeonsi/si_compute.h | 4 +++ src/gallium/drivers/radeonsi/si_shader.h | 4 +++ src/gallium/drivers/radeonsi/si_state.h | 3 +++ .../drivers/radeonsi/si_state_shaders.c | 27 +++++++++++++++++++ 5 files changed, 41 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3a519a72d59..22ef1116afe 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -100,6 +100,9 @@ static void si_create_compute_state_async(void *job, int thread_index) sel.tokens = program->tokens; sel.type = PIPE_SHADER_COMPUTE; sel.local_size = program->local_size; + si_get_active_slot_masks(&sel.info, + &program->active_const_and_shader_buffers, + &program->active_samplers_and_images); program->shader.selector = &sel; program->shader.is_monolithic = true; diff --git a/src/gallium/drivers/radeonsi/si_compute.h b/src/gallium/drivers/radeonsi/si_compute.h index ed331047beb..764d708c4fb 100644 --- a/src/gallium/drivers/radeonsi/si_compute.h +++ b/src/gallium/drivers/radeonsi/si_compute.h @@ -34,6 +34,10 @@ struct si_compute { struct util_queue_fence ready; struct si_compiler_ctx_state compiler_ctx_state; + /* bitmasks of used descriptor slots */ + uint32_t active_const_and_shader_buffers; + uint64_t active_samplers_and_images; + unsigned ir_type; unsigned local_size; unsigned private_size; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index ffb7dc3b81d..aab902b4c79 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -352,6 +352,10 @@ struct si_shader_selector { uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */ uint64_t inputs_read; /* "get_unique_index" bits */ + + /* bitmasks of used descriptor slots */ + uint32_t active_const_and_shader_buffers; + uint64_t active_samplers_and_images; }; /* Valid shader configurations: diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 9b506a8e3f7..f2003a5072e 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -353,6 +353,9 @@ void si_init_shader_functions(struct si_context *sctx); bool si_init_shader_cache(struct si_screen *sscreen); void si_destroy_shader_cache(struct si_screen *sscreen); void si_init_shader_selector_async(void *job, int thread_index); +void si_get_active_slot_masks(const struct tgsi_shader_info *info, + uint32_t *const_and_shader_buffers, + uint64_t *samplers_and_images); /* si_state_draw.c */ void si_init_ia_multi_vgt_param_table(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 313af85a1cb..45d996b6b6c 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1903,6 +1903,30 @@ void si_init_shader_selector_async(void *job, int thread_index) } } +/* Return descriptor slot usage masks from the given shader info. */ +void si_get_active_slot_masks(const struct tgsi_shader_info *info, + uint32_t *const_and_shader_buffers, + uint64_t *samplers_and_images) +{ + unsigned start, num_shaderbufs, num_constbufs, num_images, num_samplers; + + num_shaderbufs = util_last_bit(info->shader_buffers_declared); + num_constbufs = util_last_bit(info->const_buffers_declared); + /* two 8-byte images share one 16-byte slot */ + num_images = align(util_last_bit(info->images_declared), 2); + num_samplers = util_last_bit(info->samplers_declared); + + /* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */ + start = si_get_shaderbuf_slot(num_shaderbufs - 1); + *const_and_shader_buffers = + u_bit_consecutive(start, num_shaderbufs + num_constbufs); + + /* The layout is: image[last] ... image[0], sampler[0] ... sampler[last] */ + start = si_get_image_slot(num_images - 1) / 2; + *samplers_and_images = + u_bit_consecutive64(start, num_images / 2 + num_samplers); +} + static void *si_create_shader_selector(struct pipe_context *ctx, const struct pipe_shader_state *state) { @@ -1929,6 +1953,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx, tgsi_scan_shader(state->tokens, &sel->info); sel->type = sel->info.processor; p_atomic_inc(&sscreen->b.num_shaders_created); + si_get_active_slot_masks(&sel->info, + &sel->active_const_and_shader_buffers, + &sel->active_samplers_and_images); /* The prolog is a no-op if there are no inputs. */ sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX && -- 2.30.2