radeonsi: record which descriptor slots are used by shaders
authorMarek Olšák <marek.olsak@amd.com>
Mon, 15 May 2017 22:03:52 +0000 (00:03 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 18 May 2017 20:15:02 +0000 (22:15 +0200)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_compute.h
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 3a519a72d59ca14f0048ffb5bd3ced3070764bf6..22ef1116afee8b6e5de5c11461917d15a2e1b274 100644 (file)
@@ -100,6 +100,9 @@ static void si_create_compute_state_async(void *job, int thread_index)
        sel.tokens = program->tokens;
        sel.type = PIPE_SHADER_COMPUTE;
        sel.local_size = program->local_size;
+       si_get_active_slot_masks(&sel.info,
+                                &program->active_const_and_shader_buffers,
+                                &program->active_samplers_and_images);
 
        program->shader.selector = &sel;
        program->shader.is_monolithic = true;
index ed331047beb7eea1bf86f1b43e3b610da4cf97a2..764d708c4fb0a8f50895ca34de7d786af711e1ad 100644 (file)
@@ -34,6 +34,10 @@ struct si_compute {
        struct util_queue_fence ready;
        struct si_compiler_ctx_state compiler_ctx_state;
 
+       /* bitmasks of used descriptor slots */
+       uint32_t active_const_and_shader_buffers;
+       uint64_t active_samplers_and_images;
+
        unsigned ir_type;
        unsigned local_size;
        unsigned private_size;
index ffb7dc3b81d3746b0b9d3007850ddde79bf823c7..aab902b4c79d033a69b5f59d0304976e677fda21 100644 (file)
@@ -352,6 +352,10 @@ struct si_shader_selector {
        uint32_t        patch_outputs_written;  /* "get_unique_index_patch" bits */
 
        uint64_t        inputs_read;            /* "get_unique_index" bits */
+
+       /* bitmasks of used descriptor slots */
+       uint32_t        active_const_and_shader_buffers;
+       uint64_t        active_samplers_and_images;
 };
 
 /* Valid shader configurations:
index 9b506a8e3f730a1987dce8b69c9807590ad2ebee..f2003a5072e7315ee64f1cf7cfdae76b6429ed8f 100644 (file)
@@ -353,6 +353,9 @@ void si_init_shader_functions(struct si_context *sctx);
 bool si_init_shader_cache(struct si_screen *sscreen);
 void si_destroy_shader_cache(struct si_screen *sscreen);
 void si_init_shader_selector_async(void *job, int thread_index);
+void si_get_active_slot_masks(const struct tgsi_shader_info *info,
+                             uint32_t *const_and_shader_buffers,
+                             uint64_t *samplers_and_images);
 
 /* si_state_draw.c */
 void si_init_ia_multi_vgt_param_table(struct si_context *sctx);
index 313af85a1cb8bd81933695b5992f732d662daad9..45d996b6b6ca1bcbbeecd08dc7f1fa6608c965dc 100644 (file)
@@ -1903,6 +1903,30 @@ void si_init_shader_selector_async(void *job, int thread_index)
        }
 }
 
+/* Return descriptor slot usage masks from the given shader info. */
+void si_get_active_slot_masks(const struct tgsi_shader_info *info,
+                             uint32_t *const_and_shader_buffers,
+                             uint64_t *samplers_and_images)
+{
+       unsigned start, num_shaderbufs, num_constbufs, num_images, num_samplers;
+
+       num_shaderbufs = util_last_bit(info->shader_buffers_declared);
+       num_constbufs = util_last_bit(info->const_buffers_declared);
+       /* two 8-byte images share one 16-byte slot */
+       num_images = align(util_last_bit(info->images_declared), 2);
+       num_samplers = util_last_bit(info->samplers_declared);
+
+       /* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */
+       start = si_get_shaderbuf_slot(num_shaderbufs - 1);
+       *const_and_shader_buffers =
+               u_bit_consecutive(start, num_shaderbufs + num_constbufs);
+
+       /* The layout is: image[last] ... image[0], sampler[0] ... sampler[last] */
+       start = si_get_image_slot(num_images - 1) / 2;
+       *samplers_and_images =
+               u_bit_consecutive64(start, num_images / 2 + num_samplers);
+}
+
 static void *si_create_shader_selector(struct pipe_context *ctx,
                                       const struct pipe_shader_state *state)
 {
@@ -1929,6 +1953,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
        tgsi_scan_shader(state->tokens, &sel->info);
        sel->type = sel->info.processor;
        p_atomic_inc(&sscreen->b.num_shaders_created);
+       si_get_active_slot_masks(&sel->info,
+                                &sel->active_const_and_shader_buffers,
+                                &sel->active_samplers_and_images);
 
        /* The prolog is a no-op if there are no inputs. */
        sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&