amd/rtld: layout and relocate LDS symbols
[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
index f57e773090561c116ccfcb7904db88e8c7081b8d..e90884c898a9e5cd789c3bb9c4415dfe113f720c 100644 (file)
@@ -23,7 +23,7 @@
  */
 
 #include "si_build_pm4.h"
-#include "gfx9d.h"
+#include "sid.h"
 
 #include "compiler/nir/nir_serialize.h"
 #include "tgsi/tgsi_parse.h"
@@ -127,21 +127,21 @@ static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
 static void *si_get_shader_binary(struct si_shader *shader)
 {
        /* There is always a size of data followed by the data itself. */
-       unsigned relocs_size = shader->binary.reloc_count *
-                              sizeof(shader->binary.relocs[0]);
-       unsigned disasm_size = shader->binary.disasm_string ?
-                              strlen(shader->binary.disasm_string) + 1 : 0;
        unsigned llvm_ir_size = shader->binary.llvm_ir_string ?
                                strlen(shader->binary.llvm_ir_string) + 1 : 0;
+
+       /* Refuse to allocate overly large buffers and guard against integer
+        * overflow. */
+       if (shader->binary.elf_size > UINT_MAX / 4 ||
+           llvm_ir_size > UINT_MAX / 4)
+               return NULL;
+
        unsigned size =
                4 + /* total size */
                4 + /* CRC32 of the data below */
                align(sizeof(shader->config), 4) +
                align(sizeof(shader->info), 4) +
-               4 + align(shader->binary.code_size, 4) +
-               4 + align(shader->binary.rodata_size, 4) +
-               4 + align(relocs_size, 4) +
-               4 + align(disasm_size, 4) +
+               4 + align(shader->binary.elf_size, 4) +
                4 + align(llvm_ir_size, 4);
        void *buffer = CALLOC(1, size);
        uint32_t *ptr = (uint32_t*)buffer;
@@ -154,10 +154,7 @@ static void *si_get_shader_binary(struct si_shader *shader)
 
        ptr = write_data(ptr, &shader->config, sizeof(shader->config));
        ptr = write_data(ptr, &shader->info, sizeof(shader->info));
-       ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
-       ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
-       ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
-       ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
+       ptr = write_chunk(ptr, shader->binary.elf_buffer, shader->binary.elf_size);
        ptr = write_chunk(ptr, shader->binary.llvm_ir_string, llvm_ir_size);
        assert((char *)ptr - (char *)buffer == size);
 
@@ -175,6 +172,7 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
        uint32_t size = *ptr++;
        uint32_t crc32 = *ptr++;
        unsigned chunk_size;
+       unsigned elf_size;
 
        if (util_hash_crc32(ptr, size - 8) != crc32) {
                fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
@@ -183,13 +181,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
 
        ptr = read_data(ptr, &shader->config, sizeof(shader->config));
        ptr = read_data(ptr, &shader->info, sizeof(shader->info));
-       ptr = read_chunk(ptr, (void**)&shader->binary.code,
-                        &shader->binary.code_size);
-       ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
-                        &shader->binary.rodata_size);
-       ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
-       shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
-       ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
+       ptr = read_chunk(ptr, (void**)&shader->binary.elf_buffer,
+                        &elf_size);
+       shader->binary.elf_size = elf_size;
        ptr = read_chunk(ptr, (void**)&shader->binary.llvm_ir_string, &chunk_size);
 
        return true;
@@ -473,7 +467,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
        unsigned vgpr_comp_cnt;
        uint64_t va;
 
-       assert(sscreen->info.chip_class <= VI);
+       assert(sscreen->info.chip_class <= GFX8);
 
        pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
@@ -547,7 +541,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
                       S_00B428_FLOAT_MODE(shader->config.float_mode) |
                       S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt));
 
-       if (sscreen->info.chip_class <= VI) {
+       if (sscreen->info.chip_class <= GFX8) {
                si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
                               shader->config.rsrc2);
        }
@@ -576,7 +570,7 @@ static void si_emit_shader_es(struct si_context *sctx)
                                           shader->vgt_vertex_reuse_block_cntl);
 
        if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
 }
 
 static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@@ -587,7 +581,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
        uint64_t va;
        unsigned oc_lds_en;
 
-       assert(sscreen->info.chip_class <= VI);
+       assert(sscreen->info.chip_class <= GFX8);
 
        pm4 = si_get_shader_pm4_state(shader);
        if (!pm4)
@@ -825,7 +819,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
        }
 
        if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
 }
 
 static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@@ -973,7 +967,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                   SI_TRACKED_VGT_PRIMITIVEID_EN,
                                   shader->ctx_reg.vs.vgt_primitiveid_en);
 
-       if (sctx->chip_class <= VI) {
+       if (sctx->chip_class <= GFX8) {
                radeon_opt_set_context_reg(sctx, R_028AB4_VGT_REUSE_OFF,
                                           SI_TRACKED_VGT_REUSE_OFF,
                                           shader->ctx_reg.vs.vgt_reuse_off);
@@ -1002,7 +996,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                           shader->vgt_vertex_reuse_block_cntl);
 
        if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
 }
 
 /**
@@ -1052,7 +1046,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
                shader->ctx_reg.vs.vgt_primitiveid_en = 0;
        }
 
-       if (sscreen->info.chip_class <= VI) {
+       if (sscreen->info.chip_class <= GFX8) {
                /* Reuse needs to be set off if we write oViewport. */
                shader->ctx_reg.vs.vgt_reuse_off =
                                S_028AB4_REUSE_OFF(info->writes_viewport_index);
@@ -1194,7 +1188,7 @@ static void si_emit_shader_ps(struct si_context *sctx)
                                   shader->ctx_reg.ps.cb_shader_mask);
 
        if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
 }
 
 static void si_shader_ps(struct si_shader *shader)
@@ -1370,28 +1364,53 @@ static unsigned si_get_alpha_test_func(struct si_context *sctx)
        return PIPE_FUNC_ALWAYS;
 }
 
-static void si_shader_selector_key_vs(struct si_context *sctx,
-                                     struct si_shader_selector *vs,
-                                     struct si_shader_key *key,
-                                     struct si_vs_prolog_bits *prolog_key)
+void si_shader_selector_key_vs(struct si_context *sctx,
+                              struct si_shader_selector *vs,
+                              struct si_shader_key *key,
+                              struct si_vs_prolog_bits *prolog_key)
 {
        if (!sctx->vertex_elements ||
            vs->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS])
                return;
 
-       prolog_key->instance_divisor_is_one =
-               sctx->vertex_elements->instance_divisor_is_one;
-       prolog_key->instance_divisor_is_fetched =
-               sctx->vertex_elements->instance_divisor_is_fetched;
+       struct si_vertex_elements *elts = sctx->vertex_elements;
+
+       prolog_key->instance_divisor_is_one = elts->instance_divisor_is_one;
+       prolog_key->instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
+       prolog_key->unpack_instance_id_from_vertex_id =
+               sctx->prim_discard_cs_instancing;
 
        /* Prefer a monolithic shader to allow scheduling divisions around
         * VBO loads. */
        if (prolog_key->instance_divisor_is_fetched)
                key->opt.prefer_mono = 1;
 
-       unsigned count = MIN2(vs->info.num_inputs,
-                             sctx->vertex_elements->count);
-       memcpy(key->mono.vs_fix_fetch, sctx->vertex_elements->fix_fetch, count);
+       unsigned count = MIN2(vs->info.num_inputs, elts->count);
+       unsigned count_mask = (1 << count) - 1;
+       unsigned fix = elts->fix_fetch_always & count_mask;
+       unsigned opencode = elts->fix_fetch_opencode & count_mask;
+
+       if (sctx->vertex_buffer_unaligned & elts->vb_alignment_check_mask) {
+               uint32_t mask = elts->fix_fetch_unaligned & count_mask;
+               while (mask) {
+                       unsigned i = u_bit_scan(&mask);
+                       unsigned log_hw_load_size = 1 + ((elts->hw_load_is_dword >> i) & 1);
+                       unsigned vbidx = elts->vertex_buffer_index[i];
+                       struct pipe_vertex_buffer *vb = &sctx->vertex_buffer[vbidx];
+                       unsigned align_mask = (1 << log_hw_load_size) - 1;
+                       if (vb->buffer_offset & align_mask ||
+                           vb->stride & align_mask) {
+                               fix |= 1 << i;
+                               opencode |= 1 << i;
+                       }
+               }
+       }
+
+       while (fix) {
+               unsigned i = u_bit_scan(&fix);
+               key->mono.vs_fix_fetch[i].bits = elts->fix_fetch[i];
+       }
+       key->mono.vs_fetch_opencode = opencode;
 }
 
 static void si_shader_selector_key_hw_vs(struct si_context *sctx,
@@ -1579,11 +1598,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                    blend && blend->alpha_to_coverage)
                        key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
 
-               /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+               /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
                 * to the range supported by the type if a channel has less
                 * than 16 bits and the export format is 16_ABGR.
                 */
-               if (sctx->chip_class <= CIK && sctx->family != CHIP_HAWAII) {
+               if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
                        key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
                        key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
                }
@@ -1690,7 +1709,6 @@ static void si_build_shader_variant(struct si_shader *shader,
        struct si_screen *sscreen = sel->screen;
        struct ac_llvm_compiler *compiler;
        struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
-       int r;
 
        if (thread_index >= 0) {
                if (low_priority) {
@@ -1707,10 +1725,9 @@ static void si_build_shader_variant(struct si_shader *shader,
                compiler = shader->compiler_ctx_state.compiler;
        }
 
-       r = si_shader_create(sscreen, compiler, shader, debug);
-       if (unlikely(r)) {
-               PRINT_ERR("Failed to build shader variant (type=%u) %d\n",
-                        sel->type, r);
+       if (unlikely(!si_shader_create(sscreen, compiler, shader, debug))) {
+               PRINT_ERR("Failed to build shader variant (type=%u)\n",
+                         sel->type);
                shader->compilation_failed = true;
                return;
        }
@@ -1771,12 +1788,19 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
        return true;
 }
 
-/* Select the hw shader variant depending on the current state. */
-static int si_shader_select_with_key(struct si_screen *sscreen,
-                                    struct si_shader_ctx_state *state,
-                                    struct si_compiler_ctx_state *compiler_state,
-                                    struct si_shader_key *key,
-                                    int thread_index)
+/**
+ * Select a shader variant according to the shader key.
+ *
+ * \param optimized_or_none  If the key describes an optimized shader variant and
+ *                           the compilation isn't finished, don't select any
+ *                           shader and return an error.
+ */
+int si_shader_select_with_key(struct si_screen *sscreen,
+                             struct si_shader_ctx_state *state,
+                             struct si_compiler_ctx_state *compiler_state,
+                             struct si_shader_key *key,
+                             int thread_index,
+                             bool optimized_or_none)
 {
        struct si_shader_selector *sel = state->cso;
        struct si_shader_selector *previous_stage_sel = NULL;
@@ -1792,6 +1816,9 @@ again:
                   memcmp(&current->key, key, sizeof(*key)) == 0)) {
                if (unlikely(!util_queue_fence_is_signalled(&current->ready))) {
                        if (current->is_optimized) {
+                               if (optimized_or_none)
+                                       return -1;
+
                                memset(&key->opt, 0, sizeof(key->opt));
                                goto current_not_ready;
                        }
@@ -1828,6 +1855,8 @@ current_not_ready:
                                 * shader so as not to cause a stall due to compilation.
                                 */
                                if (iter->is_optimized) {
+                                       if (optimized_or_none)
+                                               return -1;
                                        memset(&key->opt, 0, sizeof(key->opt));
                                        goto again;
                                }
@@ -1869,13 +1898,17 @@ current_not_ready:
                        util_queue_fence_wait(&previous_stage_sel->ready);
        }
 
-       /* Compile the main shader part if it doesn't exist. This can happen
-        * if the initial guess was wrong. */
        bool is_pure_monolithic =
                sscreen->use_monolithic_shaders ||
                memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0;
 
-       if (!is_pure_monolithic) {
+       /* Compile the main shader part if it doesn't exist. This can happen
+        * if the initial guess was wrong.
+        *
+        * The prim discard CS doesn't need the main shader part.
+        */
+       if (!is_pure_monolithic &&
+           !key->opt.vs_as_prim_discard_cs) {
                bool ok;
 
                /* Make sure the main shader part is present. This is needed
@@ -1926,14 +1959,13 @@ current_not_ready:
                is_pure_monolithic ||
                memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
 
+       /* The prim discard CS is always optimized. */
        shader->is_optimized =
-               !is_pure_monolithic &&
-               memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
+               (!is_pure_monolithic || key->opt.vs_as_prim_discard_cs) &&
+                memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
 
        /* If it's an optimized shader, compile it asynchronously. */
-       if (shader->is_optimized &&
-           !is_pure_monolithic &&
-           thread_index < 0) {
+       if (shader->is_optimized && thread_index < 0) {
                /* Compile it asynchronously. */
                util_queue_add_job(&sscreen->shader_compiler_queue_low_priority,
                                   shader, &shader->ready,
@@ -1956,6 +1988,8 @@ current_not_ready:
                if (sscreen->options.sync_compile)
                        util_queue_fence_wait(&shader->ready);
 
+               if (optimized_or_none)
+                       return -1;
                goto again;
        }
 
@@ -1992,7 +2026,7 @@ static int si_shader_select(struct pipe_context *ctx,
 
        si_shader_selector_key(ctx, state->cso, &key);
        return si_shader_select_with_key(sctx->screen, state, compiler_state,
-                                        &key, -1);
+                                        &key, -1, false);
 }
 
 static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
@@ -2082,7 +2116,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                if (ir_binary &&
                    si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
                        mtx_unlock(&sscreen->shader_cache_mutex);
-                       si_shader_dump_stats_for_shader_db(shader, debug);
+                       si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
                } else {
                        mtx_unlock(&sscreen->shader_cache_mutex);
 
@@ -2247,6 +2281,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
                sel->nir = state->ir.nir;
 
+               si_nir_opts(sel->nir);
                si_nir_scan_shader(sel->nir, &sel->info);
                si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
        }
@@ -2275,6 +2310,15 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                sel->info.uses_kill &&
                sctx->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL);
 
+       sel->prim_discard_cs_allowed =
+               sel->type == PIPE_SHADER_VERTEX &&
+               !sel->info.uses_bindless_images &&
+               !sel->info.uses_bindless_samplers &&
+               !sel->info.writes_memory &&
+               !sel->info.writes_viewport_index &&
+               !sel->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] &&
+               !sel->so.num_outputs;
+
        /* Set which opcode uses which (i,j) pair. */
        if (sel->info.uses_persp_opcode_interp_centroid)
                sel->info.uses_persp_centroid = true;
@@ -2682,10 +2726,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
                switch (shader->selector->type) {
                case PIPE_SHADER_VERTEX:
                        if (shader->key.as_ls) {
-                               assert(sctx->chip_class <= VI);
+                               assert(sctx->chip_class <= GFX8);
                                si_pm4_delete_state(sctx, ls, shader->pm4);
                        } else if (shader->key.as_es) {
-                               assert(sctx->chip_class <= VI);
+                               assert(sctx->chip_class <= GFX8);
                                si_pm4_delete_state(sctx, es, shader->pm4);
                        } else {
                                si_pm4_delete_state(sctx, vs, shader->pm4);
@@ -2696,7 +2740,7 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
                        break;
                case PIPE_SHADER_TESS_EVAL:
                        if (shader->key.as_es) {
-                               assert(sctx->chip_class <= VI);
+                               assert(sctx->chip_class <= GFX8);
                                si_pm4_delete_state(sctx, es, shader->pm4);
                        } else {
                                si_pm4_delete_state(sctx, vs, shader->pm4);
@@ -2776,7 +2820,8 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
        unsigned j, offset, ps_input_cntl = 0;
 
        if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
-           (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
+           (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade) ||
+           name == TGSI_SEMANTIC_PRIMID)
                ps_input_cntl |= S_028644_FLAT_SHADE(1);
 
        if (name == TGSI_SEMANTIC_PCOORD ||
@@ -2877,7 +2922,7 @@ static void si_emit_spi_map(struct si_context *sctx)
                                    sctx->tracked_regs.spi_ps_input_cntl, num_interp);
 
        if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
 }
 
 /**
@@ -2913,10 +2958,10 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
        unsigned num_se = sctx->screen->info.max_se;
        unsigned wave_size = 64;
        unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
-       /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
-        * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+       /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+        * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
         */
-       unsigned gs_vertex_reuse = (sctx->chip_class >= VI ? 32 : 16) * num_se;
+       unsigned gs_vertex_reuse = (sctx->chip_class >= GFX8 ? 32 : 16) * num_se;
        unsigned alignment = 256 * num_se;
        /* The maximum size is 63.999 MB per SE. */
        unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
@@ -2943,7 +2988,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
         *
         * GFX9 doesn't have the ESGS ring.
         */
-       bool update_esgs = sctx->chip_class <= VI &&
+       bool update_esgs = sctx->chip_class <= GFX8 &&
                           esgs_ring_size &&
                           (!sctx->esgs_ring ||
                            sctx->esgs_ring->width0 < esgs_ring_size);
@@ -2981,9 +3026,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
        if (!pm4)
                return false;
 
-       if (sctx->chip_class >= CIK) {
+       if (sctx->chip_class >= GFX7) {
                if (sctx->esgs_ring) {
-                       assert(sctx->chip_class <= VI);
+                       assert(sctx->chip_class <= GFX8);
                        si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
                                       sctx->esgs_ring->width0 / 256);
                }
@@ -3015,7 +3060,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 
        /* Set ring bindings. */
        if (sctx->esgs_ring) {
-               assert(sctx->chip_class <= VI);
+               assert(sctx->chip_class <= GFX8);
                si_set_ring_buffer(sctx, SI_ES_RING_ESGS,
                                   sctx->esgs_ring, 0, sctx->esgs_ring->width0,
                                   true, true, 4, 64, 0);
@@ -3057,7 +3102,6 @@ static int si_update_scratch_buffer(struct si_context *sctx,
                                    struct si_shader *shader)
 {
        uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
-       int r;
 
        if (!shader)
                return 0;
@@ -3082,16 +3126,10 @@ static int si_update_scratch_buffer(struct si_context *sctx,
 
        assert(sctx->scratch_buffer);
 
-       if (shader->previous_stage)
-               si_shader_apply_scratch_relocs(shader->previous_stage, scratch_va);
-
-       si_shader_apply_scratch_relocs(shader, scratch_va);
-
        /* Replace the shader bo with a new bo that has the relocs applied. */
-       r = si_shader_binary_upload(sctx->screen, shader);
-       if (r) {
+       if (!si_shader_binary_upload(sctx->screen, shader, scratch_va)) {
                si_shader_unlock(shader);
-               return r;
+               return -1;
        }
 
        /* Update the shader state to use the new shader bo. */
@@ -3264,7 +3302,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
                             sctx->screen->tess_offchip_ring_size;
 
        /* Append these registers to the init config state. */
-       if (sctx->chip_class >= CIK) {
+       if (sctx->chip_class >= GFX7) {
                si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
                               S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
                si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
@@ -3352,7 +3390,7 @@ bool si_update_shaders(struct si_context *sctx)
                }
 
                /* VS as LS */
-               if (sctx->chip_class <= VI) {
+               if (sctx->chip_class <= GFX8) {
                        r = si_shader_select(ctx, &sctx->vs_shader,
                                             &compiler_state);
                        if (r)
@@ -3384,7 +3422,7 @@ bool si_update_shaders(struct si_context *sctx)
 
                if (sctx->gs_shader.cso) {
                        /* TES as ES */
-                       if (sctx->chip_class <= VI) {
+                       if (sctx->chip_class <= GFX8) {
                                r = si_shader_select(ctx, &sctx->tes_shader,
                                                     &compiler_state);
                                if (r)
@@ -3400,7 +3438,7 @@ bool si_update_shaders(struct si_context *sctx)
                        si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
                }
        } else if (sctx->gs_shader.cso) {
-               if (sctx->chip_class <= VI) {
+               if (sctx->chip_class <= GFX8) {
                        /* VS as ES */
                        r = si_shader_select(ctx, &sctx->vs_shader,
                                             &compiler_state);
@@ -3433,7 +3471,7 @@ bool si_update_shaders(struct si_context *sctx)
                        return false;
        } else {
                si_pm4_bind_state(sctx, gs, NULL);
-               if (sctx->chip_class <= VI)
+               if (sctx->chip_class <= GFX8)
                        si_pm4_bind_state(sctx, es, NULL);
        }
 
@@ -3480,7 +3518,7 @@ bool si_update_shaders(struct si_context *sctx)
                        sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
                        si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
 
-                       if (sctx->chip_class == SI)
+                       if (sctx->chip_class == GFX6)
                                si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
 
                        if (sctx->framebuffer.nr_samples <= 1)
@@ -3498,7 +3536,7 @@ bool si_update_shaders(struct si_context *sctx)
                        return false;
        }
 
-       if (sctx->chip_class >= CIK) {
+       if (sctx->chip_class >= GFX7) {
                if (si_pm4_state_enabled_and_changed(sctx, ls))
                        sctx->prefetch_L2_mask |= SI_PREFETCH_LS;
                else if (!sctx->queued.named.ls)