radeonsi: Enable VGPR spilling for all shader types v5
authorTom Stellard <thomas.stellard@amd.com>
Wed, 10 Dec 2014 14:13:59 +0000 (09:13 -0500)
committerTom Stellard <thomas.stellard@amd.com>
Wed, 28 Jan 2015 21:03:47 +0000 (21:03 +0000)
v2:
  - Only emit write SPI_TMPRING_SIZE once per packet.
  - Use context global scratch buffer.

v3:
  - Patch shaders using WRITE_DATA packet instead of map/unmap.
  - Emit ICACHE_FLUSH, CS_PARTIAL_FLUSH, PS_PARTIAL_FLUSH, and
    VS_PARTIAL_FLUSH when patching shaders.

v4:
  - Code cleanups.
  - Remove unnecessary multiplies.

v5:
  - Patch shaders in system memory and re-upload to vram.

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_hw_context.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_shader.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 12b91db07c13b88a0e30764aebddcc9d0dbcaca9..5009f6994430cd17c860752f65876d4fbd2e2b17 100644 (file)
 #define NUM_USER_SGPRS 4
 #endif
 
-static const char *scratch_rsrc_dword0_symbol =
-       "SCRATCH_RSRC_DWORD0";
-
-static const char *scratch_rsrc_dword1_symbol =
-       "SCRATCH_RSRC_DWORD1";
-
 struct si_compute {
        struct si_context *ctx;
 
@@ -67,8 +61,6 @@ struct si_compute {
 #endif
 };
 
-static void apply_scratch_relocs(const struct si_screen *sscreen,
-                       struct si_shader *shader, uint64_t scratch_va);
 static void init_scratch_buffer(struct si_context *sctx, struct si_compute *program)
 {
        unsigned scratch_bytes = 0;
@@ -83,7 +75,7 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog
                                program->shader.binary.global_symbol_offsets[i];
                unsigned scratch_bytes_needed;
 
-               si_shader_binary_read_config(&program->shader.binary,
+               si_shader_binary_read_config(sctx->screen,
                                                &program->shader, offset);
                scratch_bytes_needed = program->shader.scratch_bytes_per_wave;
                scratch_bytes = MAX2(scratch_bytes, scratch_bytes_needed);
@@ -106,8 +98,8 @@ static void init_scratch_buffer(struct si_context *sctx, struct si_compute *prog
        program->shader.scratch_bytes_per_wave = scratch_bytes;
 
        /* Patch the shader with the scratch buffer address. */
-       apply_scratch_relocs(sctx->screen, &program->shader, scratch_buffer_va);
-
+       si_shader_apply_scratch_relocs(sctx,
+                               &program->shader, scratch_buffer_va);
 }
 
 static void *si_create_compute_state(
@@ -231,30 +223,6 @@ static unsigned compute_num_waves_for_scratch(
        return scratch_waves;
 }
 
-static void apply_scratch_relocs(const struct si_screen *sscreen,
-                       struct si_shader *shader, uint64_t scratch_va) {
-       unsigned i;
-       uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
-       uint32_t scratch_rsrc_dword1 =
-               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
-               |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
-
-       if (!shader->binary.reloc_count) {
-               return;
-       }
-
-       for (i = 0 ; i < shader->binary.reloc_count; i++) {
-               const struct radeon_shader_reloc *reloc = &shader->binary.relocs[i];
-               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
-                       util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
-                               &scratch_rsrc_dword0, 4);
-               } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
-                       util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
-                               &scratch_rsrc_dword1, 4);
-               }
-       }
-}
-
 static void si_launch_grid(
                struct pipe_context *ctx,
                const uint *block_layout, const uint *grid_layout,
@@ -299,7 +267,7 @@ static void si_launch_grid(
 
 #if HAVE_LLVM >= 0x0306
        /* Read the config information */
-       si_shader_binary_read_config(&program->shader.binary, shader, pc);
+       si_shader_binary_read_config(sctx->screen, shader, pc);
 #endif
 
        /* Upload the kernel arguments */
@@ -510,13 +478,15 @@ static void si_delete_compute_state(struct pipe_context *ctx, void* state){
                LLVMContextDispose(program->llvm_ctx);
        }
 #else
+       FREE(program->shader.binary.config);
+       FREE(program->shader.binary.rodata);
+       FREE(program->shader.binary.global_symbol_offsets);
        si_shader_destroy(ctx, &program->shader);
 #endif
 
        pipe_resource_reference(
                (struct pipe_resource **)&program->input_buffer, NULL);
 
-       radeon_shader_binary_free_members(&program->shader.binary, true);
        FREE(program);
 }
 
index 21c3ebfc4a5d29a297c91ddce3d0141d5ccc327f..1cacc2660c3194d657f32d6a736ca68096cb6783 100644 (file)
@@ -160,4 +160,5 @@ void si_begin_new_cs(struct si_context *ctx)
        ctx->last_prim = -1;
        ctx->last_multi_vgt_param = -1;
        ctx->last_rast_prim = -1;
+       ctx->emit_scratch_reloc = true;
 }
index e3f8fcf80322763b1866b2ac8f3579cffa92e01c..eb2b785de53b5a72a713a2f5ac45994cae8b866b 100644 (file)
@@ -46,6 +46,7 @@ static void si_destroy_context(struct pipe_context *context)
        pipe_resource_reference(&sctx->gsvs_ring, NULL);
        pipe_resource_reference(&sctx->null_const_buf.buffer, NULL);
        r600_resource_reference(&sctx->border_color_table, NULL);
+       r600_resource_reference(&sctx->scratch_buffer, NULL);
 
        si_pm4_free_state(sctx, sctx->init_config, ~0);
        si_pm4_delete_state(sctx, gs_rings, sctx->gs_rings);
@@ -158,6 +159,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
                                     sctx->null_const_buf.buffer->width0, 0, false);
        }
 
+       /* XXX: This is the maximum value allowed.  I'm not sure how to compute
+        * this for non-cs shaders.  Using the wrong value here can result in
+        * GPU lockups, but the maximum value seems to always work.
+        */
+       sctx->scratch_waves = 32 * sscreen->b.info.max_compute_units;
+
        return &sctx->b.b;
 fail:
        si_destroy_context(&sctx->b.b);
@@ -525,7 +532,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
        r600_target = radeon_llvm_get_r600_target(triple);
        sscreen->tm = LLVMCreateTargetMachine(r600_target, triple,
                                r600_get_llvm_processor_name(sscreen->b.family),
-                               "+DumpCode", LLVMCodeGenLevelDefault, LLVMRelocDefault,
+                               "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault,
                                LLVMCodeModelDefault);
 #endif
        return &sscreen->b.b;
index 6144fb1b2f08dc504d4469bf378c0e0808d4f2f5..b88f1542c5e35e9d3d7eacfe2cff6907fee82d6c 100644 (file)
@@ -173,6 +173,7 @@ struct si_context {
        struct si_buffer_resources      const_buffers[SI_NUM_SHADERS];
        struct si_buffer_resources      rw_buffers[SI_NUM_SHADERS];
        struct si_textures_info         samplers[SI_NUM_SHADERS];
+       struct r600_resource            *scratch_buffer;
        struct r600_resource            *border_color_table;
        unsigned                        border_color_offset;
 
@@ -220,6 +221,11 @@ struct si_context {
        int                     last_prim;
        int                     last_multi_vgt_param;
        int                     last_rast_prim;
+
+       /* Scratch buffer */
+       boolean                 emit_scratch_reloc;
+       unsigned                scratch_waves;
+       unsigned                spi_tmpring_size;
 };
 
 /* si_blit.c */
index 571ce67c7c33c15d01d5a5639d4155f93f68996f..fb1419ddb4d5181c5315ca81d6ba8adcf811daa0 100644 (file)
@@ -32,6 +32,7 @@
 #include "gallivm/lp_bld_logic.h"
 #include "gallivm/lp_bld_arit.h"
 #include "gallivm/lp_bld_flow.h"
+#include "radeon/r600_cs.h"
 #include "radeon/radeon_llvm.h"
 #include "radeon/radeon_elf_util.h"
 #include "radeon/radeon_llvm_emit.h"
 
 #include <errno.h>
 
+static const char *scratch_rsrc_dword0_symbol =
+       "SCRATCH_RSRC_DWORD0";
+
+static const char *scratch_rsrc_dword1_symbol =
+       "SCRATCH_RSRC_DWORD1";
+
 struct si_shader_output_values
 {
        LLVMValueRef values[4];
@@ -2517,19 +2524,20 @@ static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
        }
 }
 
-void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
+void si_shader_binary_read_config(const struct si_screen *sscreen,
                                struct si_shader *shader,
                                unsigned symbol_offset)
 {
        unsigned i;
        const unsigned char *config =
-               radeon_shader_binary_config_start(binary, symbol_offset);
+               radeon_shader_binary_config_start(&shader->binary,
+                                               symbol_offset);
 
        /* XXX: We may be able to emit some of these values directly rather than
         * extracting fields to be emitted later.
         */
 
-       for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
+       for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
                unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
                unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
                switch (reg) {
@@ -2549,6 +2557,7 @@ void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
                case R_0286CC_SPI_PS_INPUT_ENA:
                        shader->spi_ps_input_ena = value;
                        break;
+               case R_0286E8_SPI_TMPRING_SIZE:
                case R_00B860_COMPUTE_TMPRING_SIZE:
                        /* WAVESIZE is in units of 256 dwords. */
                        shader->scratch_bytes_per_wave =
@@ -2562,6 +2571,29 @@ void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
        }
 }
 
+void si_shader_apply_scratch_relocs(struct si_context *sctx,
+                       struct si_shader *shader,
+                       uint64_t scratch_va)
+{
+       unsigned i;
+       uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
+       uint32_t scratch_rsrc_dword1 =
+               S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
+               |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
+
+       for (i = 0 ; i < shader->binary.reloc_count; i++) {
+               const struct radeon_shader_reloc *reloc =
+                                       &shader->binary.relocs[i];
+               if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
+                       util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
+                       &scratch_rsrc_dword0, 4);
+               } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+                       util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
+                       &scratch_rsrc_dword1, 4);
+               }
+       }
+}
+
 int si_shader_binary_read(struct si_screen *sscreen,
                        struct si_shader *shader,
                        const struct radeon_shader_binary *binary)
@@ -2582,7 +2614,7 @@ int si_shader_binary_read(struct si_screen *sscreen,
                }
        }
 
-       si_shader_binary_read_config(binary, shader, 0);
+       si_shader_binary_read_config(sscreen, shader, 0);
 
        /* copy new shader */
        code_size = binary->code_size + binary->rodata_size;
@@ -2610,18 +2642,24 @@ int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
                                                        LLVMModuleRef mod)
 {
        int r = 0;
-       struct radeon_shader_binary binary;
        bool dump = r600_can_dump_shader(&sscreen->b,
                        shader->selector ? shader->selector->tokens : NULL);
-       memset(&binary, 0, sizeof(binary));
-       r = radeon_llvm_compile(mod, &binary,
+       r = radeon_llvm_compile(mod, &shader->binary,
                r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
 
        if (r) {
                return r;
        }
-       r = si_shader_binary_read(sscreen, shader, &binary);
-       radeon_shader_binary_free_members(&binary, true);
+       r = si_shader_binary_read(sscreen, shader, &shader->binary);
+
+       FREE(shader->binary.config);
+       FREE(shader->binary.rodata);
+       FREE(shader->binary.global_symbol_offsets);
+       if (shader->scratch_bytes_per_wave == 0) {
+               FREE(shader->binary.code);
+               FREE(shader->binary.relocs);
+               memset(&shader->binary, 0, sizeof(shader->binary));
+       }
        return r;
 }
 
@@ -2861,4 +2899,7 @@ void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
                r600_resource_reference(&shader->scratch_bo, NULL);
 
        r600_resource_reference(&shader->bo, NULL);
+
+       FREE(shader->binary.code);
+       FREE(shader->binary.relocs);
 }
index 6def5c7131380e75eaf5e9901793c445abec504b..1d7efc23f593cf4d897fd8ee4f790a4f212993b9 100644 (file)
@@ -34,6 +34,7 @@
 #include "si_state.h"
 
 struct radeon_shader_binary;
+struct radeon_shader_reloc;
 
 #define SI_SGPR_RW_BUFFERS     0  /* rings (& stream-out, VS only) */
 #define SI_SGPR_CONST          2
@@ -186,7 +187,10 @@ void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
                const struct radeon_shader_binary *binary);
-void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
+void si_shader_apply_scratch_relocs(struct si_context *sctx,
+                       struct si_shader *shader,
+                       uint64_t scratch_va);
+void si_shader_binary_read_config(const struct si_screen *sscreen,
                                struct si_shader *shader,
                                unsigned symbol_offset);
 
index cd4880bfd2db9dc6c6980c12ed1b1effdcb01b21..9446eca09c86f6041a47313d948d0ec6f3a71e10 100644 (file)
@@ -571,6 +571,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
        if (sctx->b.flags)
                sctx->atoms.s.cache_flush->dirty = true;
 
+       if (sctx->emit_scratch_reloc) {
+               struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
+               r600_write_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
+                               sctx->spi_tmpring_size);
+
+               if (sctx->scratch_buffer) {
+                        r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
+                               sctx->scratch_buffer, RADEON_USAGE_READWRITE,
+                               RADEON_PRIO_SHADER_RESOURCE_RW);
+
+               }
+               sctx->emit_scratch_reloc = false;
+       }
+
        si_need_cs_space(sctx, 0, TRUE);
 
        /* Emit states. */
index 3249bcc5aa645a1349193dc1329e54d29c41eadc..a40926800ddd3f51c080cf8d1628da9270569559 100644 (file)
@@ -67,7 +67,8 @@ static void si_shader_es(struct si_shader *shader)
                       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
                       S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
        si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
-                      S_00B32C_USER_SGPR(num_user_sgprs));
+                      S_00B32C_USER_SGPR(num_user_sgprs) |
+                      S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_gs(struct si_shader *shader)
@@ -136,7 +137,8 @@ static void si_shader_gs(struct si_shader *shader)
                       S_00B228_SGPRS((num_sgprs - 1) / 8) |
                       S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
        si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
-                      S_00B22C_USER_SGPR(num_user_sgprs));
+                      S_00B22C_USER_SGPR(num_user_sgprs) |
+                      S_00B22C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_vs(struct si_shader *shader)
@@ -216,7 +218,8 @@ static void si_shader_vs(struct si_shader *shader)
                       S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
                       S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
                       S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
-                      S_00B12C_SO_EN(!!shader->selector->so.num_outputs));
+                      S_00B12C_SO_EN(!!shader->selector->so.num_outputs) |
+                      S_00B12C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
        if (window_space)
                si_pm4_set_reg(pm4, R_028818_PA_CL_VTE_CNTL,
                               S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1));
@@ -311,7 +314,8 @@ static void si_shader_ps(struct si_shader *shader)
                       S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
        si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
                       S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
-                      S_00B02C_USER_SGPR(num_user_sgprs));
+                      S_00B02C_USER_SGPR(num_user_sgprs) |
+                      S_00B32C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0));
 }
 
 static void si_shader_init_pm4_state(struct si_shader *shader)
@@ -710,6 +714,119 @@ static void si_init_gs_rings(struct si_context *sctx)
                           false, false, 0, 0);
 }
 
+/**
+ * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
+ *          otherwise.
+ */
+static unsigned si_update_scratch_buffer(struct si_context *sctx,
+                                   struct si_shader_selector *sel)
+{
+       struct si_shader *shader;
+       uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
+       unsigned char *ptr;
+
+       if (!sel)
+               return 0;
+
+       shader = sel->current;
+
+       /* This shader doesn't need a scratch buffer */
+       if (shader->scratch_bytes_per_wave == 0)
+               return 0;
+
+       /* This shader is already configured to use the current
+        * scratch buffer. */
+       if (shader->scratch_bo == sctx->scratch_buffer)
+               return 0;
+
+       assert(sctx->scratch_buffer);
+
+       si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
+
+       /* Replace the shader bo with a new bo that has the relocs applied. */
+       r600_resource_reference(&shader->bo, NULL);
+       shader->bo = si_resource_create_custom(&sctx->screen->b.b, PIPE_USAGE_IMMUTABLE,
+                                              shader->binary.code_size);
+       ptr = sctx->screen->b.ws->buffer_map(shader->bo->cs_buf, NULL, PIPE_TRANSFER_WRITE);
+       util_memcpy_cpu_to_le32(ptr, shader->binary.code, shader->binary.code_size);
+       sctx->screen->b.ws->buffer_unmap(shader->bo->cs_buf);
+
+       /* Update the shader state to use the new shader bo. */
+       si_shader_init_pm4_state(shader);
+
+       r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
+
+       return 1;
+}
+
+static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
+{
+       if (!sctx->scratch_buffer)
+               return 0;
+
+       return sctx->scratch_buffer->b.b.width0;
+}
+
+static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_context *sctx,
+                                       struct si_shader_selector *sel)
+{
+       if (!sel)
+               return 0;
+
+       return sel->current->scratch_bytes_per_wave;
+}
+
+static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
+{
+
+       return MAX3(si_get_scratch_buffer_bytes_per_wave(sctx, sctx->ps_shader),
+                       si_get_scratch_buffer_bytes_per_wave(sctx, sctx->gs_shader),
+                       si_get_scratch_buffer_bytes_per_wave(sctx, sctx->vs_shader));
+}
+
+static void si_update_spi_tmpring_size(struct si_context *sctx)
+{
+       unsigned current_scratch_buffer_size =
+               si_get_current_scratch_buffer_size(sctx);
+       unsigned scratch_bytes_per_wave =
+               si_get_max_scratch_bytes_per_wave(sctx);
+       unsigned scratch_needed_size = scratch_bytes_per_wave *
+               sctx->scratch_waves;
+
+       if (scratch_needed_size > 0) {
+
+               if (scratch_needed_size > current_scratch_buffer_size) {
+                       /* Create a bigger scratch buffer */
+                       pipe_resource_reference(
+                                       (struct pipe_resource**)&sctx->scratch_buffer,
+                                       NULL);
+
+                       sctx->scratch_buffer =
+                                       si_resource_create_custom(&sctx->screen->b.b,
+                                       PIPE_USAGE_DEFAULT, scratch_needed_size);
+               }
+
+               /* Update the shaders, so they are using the latest scratch.  The
+                * scratch buffer may have been changed since these shaders were
+                * last used, so we still need to try to update them, even if
+                * they require scratch buffers smaller than the current size.
+                */
+               if (si_update_scratch_buffer(sctx, sctx->ps_shader))
+                       si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
+               if (si_update_scratch_buffer(sctx, sctx->gs_shader))
+                       si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
+               if (si_update_scratch_buffer(sctx, sctx->vs_shader))
+                       si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
+       }
+
+       /* The LLVM shader backend should be reporting aligned scratch_sizes. */
+       assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
+               "scratch size should already be aligned correctly.");
+
+       sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
+                               S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+}
+
 void si_update_shaders(struct si_context *sctx)
 {
        struct pipe_context *ctx = (struct pipe_context*)sctx;
@@ -786,6 +903,11 @@ void si_update_shaders(struct si_context *sctx)
                si_update_spi_map(sctx);
        }
 
+       if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
+           si_pm4_state_changed(sctx, gs)) {
+               si_update_spi_tmpring_size(sctx);
+       }
+
        if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
                sctx->ps_db_shader_control = sctx->ps_shader->current->db_shader_control;
                sctx->db_render_state.dirty = true;