radeonsi: don't load block dimensions into SGPRs if they are not variable
authorMarek Olšák <marek.olsak@amd.com>
Wed, 25 Jul 2018 05:35:11 +0000 (01:35 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 7 Aug 2018 17:52:34 +0000 (13:52 -0400)
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_compute.h
src/gallium/drivers/radeonsi/si_shader.c

index 2349be95849a2d13d1077c8c8e4d7d36e142cdaa..ea6fa3e999dd59d62cf41dfaa6804dd82ad41a03 100644 (file)
@@ -123,10 +123,10 @@ static void si_create_compute_state_async(void *job, int thread_index)
        program->shader.selector = &sel;
        program->shader.is_monolithic = true;
        program->uses_grid_size = sel.info.uses_grid_size;
-       program->uses_block_size = sel.info.uses_block_size;
        program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
        program->uses_bindless_images = sel.info.uses_bindless_images;
-       program->variable_group_size =
+       program->reads_variable_block_size =
+               sel.info.uses_block_size &&
                sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0;
 
        void *ir_binary = si_get_ir_binary(&sel);
@@ -159,7 +159,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
                bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
                unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
                                      (sel.info.uses_grid_size ? 3 : 0) +
-                                     (sel.info.uses_block_size ? 3 : 0);
+                                     (program->reads_variable_block_size ? 3 : 0);
 
                shader->config.rsrc1 =
                        S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
@@ -744,7 +744,7 @@ static void si_setup_tgsi_grid(struct si_context *sctx,
                        radeon_emit(cs, info->grid[1]);
                        radeon_emit(cs, info->grid[2]);
                }
-               if (program->variable_group_size && program->uses_block_size) {
+               if (program->reads_variable_block_size) {
                        radeon_set_sh_reg_seq(cs, block_size_reg, 3);
                        radeon_emit(cs, info->block[0]);
                        radeon_emit(cs, info->block[1]);
index 3a4cdea25ef06d664d3e164e8c712ccace988508..ef8b4aec4df3a665e0d42ed42c2659d4149d561a 100644 (file)
@@ -53,11 +53,10 @@ struct si_compute {
 
        struct pipe_resource *global_buffers[MAX_GLOBAL_BUFFERS];
        unsigned use_code_object_v2 : 1;
-       unsigned variable_group_size : 1;
        unsigned uses_grid_size:1;
-       unsigned uses_block_size:1;
        unsigned uses_bindless_samplers:1;
        unsigned uses_bindless_images:1;
+       bool reads_variable_block_size;
 };
 
 void si_destroy_compute(struct si_compute *program);
index 3afdca52ea66fe639701be1b322e12f5e89abd22..30e150eb6c089dd506076d5f8e06f51bd06ad87e 100644 (file)
@@ -4997,7 +4997,8 @@ static void create_function(struct si_shader_context *ctx)
                declare_per_stage_desc_pointers(ctx, &fninfo, true);
                if (shader->selector->info.uses_grid_size)
                        add_arg_assign(&fninfo, ARG_SGPR, v3i32, &ctx->abi.num_work_groups);
-               if (shader->selector->info.uses_block_size)
+               if (shader->selector->info.uses_block_size &&
+                   shader->selector->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0)
                        ctx->param_block_size = add_arg(&fninfo, ARG_SGPR, v3i32);
 
                for (i = 0; i < 3; i++) {