radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips
authorMarek Olšák <marek.olsak@amd.com>
Tue, 29 Nov 2016 18:25:03 +0000 (19:25 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 1 Dec 2016 01:16:51 +0000 (02:16 +0100)
All codepaths are handled except for clover.

Cc: 13.0 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 91f1b0a7420bf86bd5b2b3a8edd926667544c9e1..9d83cb3a7361891593843e973023824c5f37c6a9 100644 (file)
@@ -348,6 +348,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
                        lds_blocks += align(program->local_size, 512) >> 9;
                }
 
+               /* TODO: use si_multiwave_lds_size_workaround */
                assert(lds_blocks <= 0xFF);
 
                config->rsrc2 &= C_00B84C_LDS_SIZE;
index b19c61e99d27d3829a43c0c059526da406b279b0..80c063aaeeccb5ff9946fa6a73ff069bf14db20b 100644 (file)
@@ -8198,11 +8198,31 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
        return true;
 }
 
-static void si_fix_num_sgprs(struct si_shader *shader)
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+                                     unsigned *lds_size)
+{
+       /* SPI barrier management bug:
+        *   Make sure we have at least 4k of LDS in use to avoid the bug.
+        *   It applies to workgroup sizes of more than one wavefront.
+        */
+       if (sscreen->b.family == CHIP_BONAIRE ||
+           sscreen->b.family == CHIP_KABINI ||
+           sscreen->b.family == CHIP_MULLINS)
+               *lds_size = MAX2(*lds_size, 8);
+}
+
+static void si_fix_resource_usage(struct si_screen *sscreen,
+                                 struct si_shader *shader)
 {
        unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
 
        shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+
+       if (shader->selector->type == PIPE_SHADER_COMPUTE &&
+           si_get_max_workgroup_size(shader) > 64) {
+               si_multiwave_lds_size_workaround(sscreen,
+                                                &shader->config.lds_size);
+       }
 }
 
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
@@ -8297,7 +8317,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                }
        }
 
-       si_fix_num_sgprs(shader);
+       si_fix_resource_usage(sscreen, shader);
        si_shader_dump(sscreen, shader, debug, sel->info.processor,
                       stderr);
 
index d4bc47b3878733375fc3ff733f5bf2b24d5d646b..129e571aa732d30a3ac0cf7abf49cabdd2674128 100644 (file)
@@ -547,6 +547,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
                    struct pipe_debug_callback *debug, unsigned processor,
                    FILE *f);
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+                                     unsigned *lds_size);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
                        struct si_shader *shader,
                        struct si_shader_config *config,
index e904164fafad7c675f51c0c583dda72c915d1724..10073ef9d742f58461fcf47f60574d228654e8ec 100644 (file)
@@ -176,11 +176,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 
        if (sctx->b.chip_class >= CIK) {
                assert(lds_size <= 65536);
-               ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+               lds_size = align(lds_size, 512) / 512;
        } else {
                assert(lds_size <= 32768);
-               ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+               lds_size = align(lds_size, 256) / 256;
        }
+       si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+       ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
 
        /* Due to a hw bug, RSRC2_LS must be written twice with another
         * LS register written in between. */