radeonsi: fix scratch buffer WAVESIZE setting leading to corruption
authorMarek Olšák <marek.olsak@amd.com>
Mon, 19 Aug 2019 17:15:54 +0000 (13:15 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 27 Aug 2019 20:52:32 +0000 (16:52 -0400)
Cc: 19.2 19.1 <mesa-stable@lists.freedesktop.org>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_shaders.c

index 32f934237b0245f20a58bf28887fe97316c98679..3acc1c69a0a809ff5f19416be17784fa90a472a6 100644 (file)
@@ -532,9 +532,13 @@ static bool si_switch_compute_shader(struct si_context *sctx,
        COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
                "COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
 
+       sctx->max_seen_compute_scratch_bytes_per_wave =
+               MAX2(sctx->max_seen_compute_scratch_bytes_per_wave,
+                    config->scratch_bytes_per_wave);
+
        radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
                  S_00B860_WAVES(sctx->scratch_waves)
-                    | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
+                    | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
 
        sctx->cs_shader_state.emitted_program = program;
        sctx->cs_shader_state.offset = offset;
index b0495f0841b7b8880a5ed433240e116cd26c4481..86aee2e0824894cf1c65af308fcc0e0afed84c56 100644 (file)
@@ -1078,6 +1078,8 @@ struct si_context {
        struct si_resource      *scratch_buffer;
        unsigned                scratch_waves;
        unsigned                spi_tmpring_size;
+       unsigned                max_seen_scratch_bytes_per_wave;
+       unsigned                max_seen_compute_scratch_bytes_per_wave;
 
        struct si_resource      *compute_scratch_buffer;
 
index 5d64f39b2877175974753c3c077199f0b2b58334..d6fa1f1858219af9a8778ad4adf28f2d23f41271 100644 (file)
@@ -3625,11 +3625,6 @@ static int si_update_scratch_buffer(struct si_context *sctx,
        return 1;
 }
 
-static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
-{
-       return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
-}
-
 static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
 {
        return shader ? shader->config.scratch_bytes_per_wave : 0;
@@ -3644,23 +3639,6 @@ static struct si_shader *si_get_tcs_current(struct si_context *sctx)
                                      sctx->fixed_func_tcs_shader.current;
 }
 
-static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
-{
-       unsigned bytes = 0;
-
-       bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
-       bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
-       bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
-       bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
-
-       if (sctx->tes_shader.cso) {
-               struct si_shader *tcs = si_get_tcs_current(sctx);
-
-               bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(tcs));
-       }
-       return bytes;
-}
-
 static bool si_update_scratch_relocs(struct si_context *sctx)
 {
        struct si_shader *tcs = si_get_tcs_current(sctx);
@@ -3722,16 +3700,40 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
 
 static bool si_update_spi_tmpring_size(struct si_context *sctx)
 {
-       unsigned current_scratch_buffer_size =
-               si_get_current_scratch_buffer_size(sctx);
-       unsigned scratch_bytes_per_wave =
-               si_get_max_scratch_bytes_per_wave(sctx);
-       unsigned scratch_needed_size = scratch_bytes_per_wave *
-               sctx->scratch_waves;
+       /* SPI_TMPRING_SIZE.WAVESIZE must be constant for each scratch buffer.
+        * There are 2 cases to handle:
+        *
+        * - If the current needed size is less than the maximum seen size,
+        *   use the maximum seen size, so that WAVESIZE remains the same.
+        *
+        * - If the current needed size is greater than the maximum seen size,
+        *   the scratch buffer is reallocated, so we can increase WAVESIZE.
+        *
+        * Shaders that set SCRATCH_EN=0 don't allocate scratch space.
+        * Otherwise, the number of waves that can use scratch is
+        * SPI_TMPRING_SIZE.WAVES.
+        */
+       unsigned bytes = 0;
+
+       bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
+       bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
+       bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
+
+       if (sctx->tes_shader.cso) {
+               bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
+               bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(si_get_tcs_current(sctx)));
+       }
+
+       sctx->max_seen_scratch_bytes_per_wave =
+               MAX2(sctx->max_seen_scratch_bytes_per_wave, bytes);
+
+       unsigned scratch_needed_size =
+               sctx->max_seen_scratch_bytes_per_wave * sctx->scratch_waves;
        unsigned spi_tmpring_size;
 
        if (scratch_needed_size > 0) {
-               if (scratch_needed_size > current_scratch_buffer_size) {
+               if (!sctx->scratch_buffer ||
+                   scratch_needed_size > sctx->scratch_buffer->b.b.width0) {
                        /* Create a bigger scratch buffer */
                        si_resource_reference(&sctx->scratch_buffer, NULL);
 
@@ -3758,7 +3760,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
                "scratch size should already be aligned correctly.");
 
        spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
-                          S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+                          S_0286E8_WAVESIZE(sctx->max_seen_scratch_bytes_per_wave >> 10);
        if (spi_tmpring_size != sctx->spi_tmpring_size) {
                sctx->spi_tmpring_size = spi_tmpring_size;
                si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state);