amd/rtld: layout and relocate LDS symbols

[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c

index f57e773090561c116ccfcb7904db88e8c7081b8d..e90884c898a9e5cd789c3bb9c4415dfe113f720c 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -23,7 +23,7 @@
   */
  
  #include "si_build_pm4.h"
-#include "gfx9d.h"
+#include "sid.h"
  
  #include "compiler/nir/nir_serialize.h"
  #include "tgsi/tgsi_parse.h"
@@ -127,21 +127,21 @@ static uint32_t *read_chunk(uint32_t *ptr, void **data, unsigned *size)
  static void *si_get_shader_binary(struct si_shader *shader)
  {
         /* There is always a size of data followed by the data itself. */
-       unsigned relocs_size = shader->binary.reloc_count *
-                              sizeof(shader->binary.relocs[0]);
-       unsigned disasm_size = shader->binary.disasm_string ?
-                              strlen(shader->binary.disasm_string) + 1 : 0;
         unsigned llvm_ir_size = shader->binary.llvm_ir_string ?
                                 strlen(shader->binary.llvm_ir_string) + 1 : 0;
+
+       /* Refuse to allocate overly large buffers and guard against integer
+        * overflow. */
+       if (shader->binary.elf_size > UINT_MAX / 4 ||
+           llvm_ir_size > UINT_MAX / 4)
+               return NULL;
+
         unsigned size =
                 4 + /* total size */
                 4 + /* CRC32 of the data below */
                 align(sizeof(shader->config), 4) +
                 align(sizeof(shader->info), 4) +
-               4 + align(shader->binary.code_size, 4) +
-               4 + align(shader->binary.rodata_size, 4) +
-               4 + align(relocs_size, 4) +
-               4 + align(disasm_size, 4) +
+               4 + align(shader->binary.elf_size, 4) +
                 4 + align(llvm_ir_size, 4);
         void *buffer = CALLOC(1, size);
         uint32_t *ptr = (uint32_t*)buffer;
@@ -154,10 +154,7 @@ static void *si_get_shader_binary(struct si_shader *shader)
  
         ptr = write_data(ptr, &shader->config, sizeof(shader->config));
         ptr = write_data(ptr, &shader->info, sizeof(shader->info));
-       ptr = write_chunk(ptr, shader->binary.code, shader->binary.code_size);
-       ptr = write_chunk(ptr, shader->binary.rodata, shader->binary.rodata_size);
-       ptr = write_chunk(ptr, shader->binary.relocs, relocs_size);
-       ptr = write_chunk(ptr, shader->binary.disasm_string, disasm_size);
+       ptr = write_chunk(ptr, shader->binary.elf_buffer, shader->binary.elf_size);
         ptr = write_chunk(ptr, shader->binary.llvm_ir_string, llvm_ir_size);
         assert((char *)ptr - (char *)buffer == size);
  
@@ -175,6 +172,7 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
         uint32_t size = *ptr++;
         uint32_t crc32 = *ptr++;
         unsigned chunk_size;
+       unsigned elf_size;
  
         if (util_hash_crc32(ptr, size - 8) != crc32) {
                 fprintf(stderr, "radeonsi: binary shader has invalid CRC32\n");
@@ -183,13 +181,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
  
         ptr = read_data(ptr, &shader->config, sizeof(shader->config));
         ptr = read_data(ptr, &shader->info, sizeof(shader->info));
-       ptr = read_chunk(ptr, (void**)&shader->binary.code,
-                        &shader->binary.code_size);
-       ptr = read_chunk(ptr, (void**)&shader->binary.rodata,
-                        &shader->binary.rodata_size);
-       ptr = read_chunk(ptr, (void**)&shader->binary.relocs, &chunk_size);
-       shader->binary.reloc_count = chunk_size / sizeof(shader->binary.relocs[0]);
-       ptr = read_chunk(ptr, (void**)&shader->binary.disasm_string, &chunk_size);
+       ptr = read_chunk(ptr, (void**)&shader->binary.elf_buffer,
+                        &elf_size);
+       shader->binary.elf_size = elf_size;
         ptr = read_chunk(ptr, (void**)&shader->binary.llvm_ir_string, &chunk_size);
  
         return true;
@@ -473,7 +467,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
         unsigned vgpr_comp_cnt;
         uint64_t va;
  
-       assert(sscreen->info.chip_class <= VI);
+       assert(sscreen->info.chip_class <= GFX8);
  
         pm4 = si_get_shader_pm4_state(shader);
         if (!pm4)
@@ -547,7 +541,7 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
                        S_00B428_FLOAT_MODE(shader->config.float_mode) |
                        S_00B428_LS_VGPR_COMP_CNT(ls_vgpr_comp_cnt));
  
-       if (sscreen->info.chip_class <= VI) {
+       if (sscreen->info.chip_class <= GFX8) {
                 si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
                                shader->config.rsrc2);
         }
@@ -576,7 +570,7 @@ static void si_emit_shader_es(struct si_context *sctx)
                                            shader->vgt_vertex_reuse_block_cntl);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
@@ -587,7 +581,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
         uint64_t va;
         unsigned oc_lds_en;
  
-       assert(sscreen->info.chip_class <= VI);
+       assert(sscreen->info.chip_class <= GFX8);
  
         pm4 = si_get_shader_pm4_state(shader);
         if (!pm4)
@@ -825,7 +819,7 @@ static void si_emit_shader_gs(struct si_context *sctx)
         }
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
@@ -973,7 +967,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                    SI_TRACKED_VGT_PRIMITIVEID_EN,
                                    shader->ctx_reg.vs.vgt_primitiveid_en);
  
-       if (sctx->chip_class <= VI) {
+       if (sctx->chip_class <= GFX8) {
                 radeon_opt_set_context_reg(sctx, R_028AB4_VGT_REUSE_OFF,
                                            SI_TRACKED_VGT_REUSE_OFF,
                                            shader->ctx_reg.vs.vgt_reuse_off);
@@ -1002,7 +996,7 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                            shader->vgt_vertex_reuse_block_cntl);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  /**
@@ -1052,7 +1046,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
                 shader->ctx_reg.vs.vgt_primitiveid_en = 0;
         }
  
-       if (sscreen->info.chip_class <= VI) {
+       if (sscreen->info.chip_class <= GFX8) {
                 /* Reuse needs to be set off if we write oViewport. */
                 shader->ctx_reg.vs.vgt_reuse_off =
                                 S_028AB4_REUSE_OFF(info->writes_viewport_index);
@@ -1194,7 +1188,7 @@ static void si_emit_shader_ps(struct si_context *sctx)
                                    shader->ctx_reg.ps.cb_shader_mask);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  static void si_shader_ps(struct si_shader *shader)
@@ -1370,28 +1364,53 @@ static unsigned si_get_alpha_test_func(struct si_context *sctx)
         return PIPE_FUNC_ALWAYS;
  }
  
-static void si_shader_selector_key_vs(struct si_context *sctx,
-                                     struct si_shader_selector *vs,
-                                     struct si_shader_key *key,
-                                     struct si_vs_prolog_bits *prolog_key)
+void si_shader_selector_key_vs(struct si_context *sctx,
+                              struct si_shader_selector *vs,
+                              struct si_shader_key *key,
+                              struct si_vs_prolog_bits *prolog_key)
  {
         if (!sctx->vertex_elements ||
             vs->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS])
                 return;
  
-       prolog_key->instance_divisor_is_one =
-               sctx->vertex_elements->instance_divisor_is_one;
-       prolog_key->instance_divisor_is_fetched =
-               sctx->vertex_elements->instance_divisor_is_fetched;
+       struct si_vertex_elements *elts = sctx->vertex_elements;
+
+       prolog_key->instance_divisor_is_one = elts->instance_divisor_is_one;
+       prolog_key->instance_divisor_is_fetched = elts->instance_divisor_is_fetched;
+       prolog_key->unpack_instance_id_from_vertex_id =
+               sctx->prim_discard_cs_instancing;
  
         /* Prefer a monolithic shader to allow scheduling divisions around
          * VBO loads. */
         if (prolog_key->instance_divisor_is_fetched)
                 key->opt.prefer_mono = 1;
  
-       unsigned count = MIN2(vs->info.num_inputs,
-                             sctx->vertex_elements->count);
-       memcpy(key->mono.vs_fix_fetch, sctx->vertex_elements->fix_fetch, count);
+       unsigned count = MIN2(vs->info.num_inputs, elts->count);
+       unsigned count_mask = (1 << count) - 1;
+       unsigned fix = elts->fix_fetch_always & count_mask;
+       unsigned opencode = elts->fix_fetch_opencode & count_mask;
+
+       if (sctx->vertex_buffer_unaligned & elts->vb_alignment_check_mask) {
+               uint32_t mask = elts->fix_fetch_unaligned & count_mask;
+               while (mask) {
+                       unsigned i = u_bit_scan(&mask);
+                       unsigned log_hw_load_size = 1 + ((elts->hw_load_is_dword >> i) & 1);
+                       unsigned vbidx = elts->vertex_buffer_index[i];
+                       struct pipe_vertex_buffer *vb = &sctx->vertex_buffer[vbidx];
+                       unsigned align_mask = (1 << log_hw_load_size) - 1;
+                       if (vb->buffer_offset & align_mask ||
+                           vb->stride & align_mask) {
+                               fix |= 1 << i;
+                               opencode |= 1 << i;
+                       }
+               }
+       }
+
+       while (fix) {
+               unsigned i = u_bit_scan(&fix);
+               key->mono.vs_fix_fetch[i].bits = elts->fix_fetch[i];
+       }
+       key->mono.vs_fetch_opencode = opencode;
  }
  
  static void si_shader_selector_key_hw_vs(struct si_context *sctx,
@@ -1579,11 +1598,11 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
                     blend && blend->alpha_to_coverage)
                         key->part.ps.epilog.spi_shader_col_format |= V_028710_SPI_SHADER_32_AR;
  
-               /* On SI and CIK except Hawaii, the CB doesn't clamp outputs
+               /* On GFX6 and GFX7 except Hawaii, the CB doesn't clamp outputs
                  * to the range supported by the type if a channel has less
                  * than 16 bits and the export format is 16_ABGR.
                  */
-               if (sctx->chip_class <= CIK && sctx->family != CHIP_HAWAII) {
+               if (sctx->chip_class <= GFX7 && sctx->family != CHIP_HAWAII) {
                         key->part.ps.epilog.color_is_int8 = sctx->framebuffer.color_is_int8;
                         key->part.ps.epilog.color_is_int10 = sctx->framebuffer.color_is_int10;
                 }
@@ -1690,7 +1709,6 @@ static void si_build_shader_variant(struct si_shader *shader,
         struct si_screen *sscreen = sel->screen;
         struct ac_llvm_compiler *compiler;
         struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
-       int r;
  
         if (thread_index >= 0) {
                 if (low_priority) {
@@ -1707,10 +1725,9 @@ static void si_build_shader_variant(struct si_shader *shader,
                 compiler = shader->compiler_ctx_state.compiler;
         }
  
-       r = si_shader_create(sscreen, compiler, shader, debug);
-       if (unlikely(r)) {
-               PRINT_ERR("Failed to build shader variant (type=%u) %d\n",
-                        sel->type, r);
+       if (unlikely(!si_shader_create(sscreen, compiler, shader, debug))) {
+               PRINT_ERR("Failed to build shader variant (type=%u)\n",
+                         sel->type);
                 shader->compilation_failed = true;
                 return;
         }
@@ -1771,12 +1788,19 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
         return true;
  }
  
-/* Select the hw shader variant depending on the current state. */
-static int si_shader_select_with_key(struct si_screen *sscreen,
-                                    struct si_shader_ctx_state *state,
-                                    struct si_compiler_ctx_state *compiler_state,
-                                    struct si_shader_key *key,
-                                    int thread_index)
+/**
+ * Select a shader variant according to the shader key.
+ *
+ * \param optimized_or_none  If the key describes an optimized shader variant and
+ *                           the compilation isn't finished, don't select any
+ *                           shader and return an error.
+ */
+int si_shader_select_with_key(struct si_screen *sscreen,
+                             struct si_shader_ctx_state *state,
+                             struct si_compiler_ctx_state *compiler_state,
+                             struct si_shader_key *key,
+                             int thread_index,
+                             bool optimized_or_none)
  {
         struct si_shader_selector *sel = state->cso;
         struct si_shader_selector *previous_stage_sel = NULL;
@@ -1792,6 +1816,9 @@ again:
                    memcmp(&current->key, key, sizeof(*key)) == 0)) {
                 if (unlikely(!util_queue_fence_is_signalled(&current->ready))) {
                         if (current->is_optimized) {
+                               if (optimized_or_none)
+                                       return -1;
+
                                 memset(&key->opt, 0, sizeof(key->opt));
                                 goto current_not_ready;
                         }
@@ -1828,6 +1855,8 @@ current_not_ready:
                                  * shader so as not to cause a stall due to compilation.
                                  */
                                 if (iter->is_optimized) {
+                                       if (optimized_or_none)
+                                               return -1;
                                         memset(&key->opt, 0, sizeof(key->opt));
                                         goto again;
                                 }
@@ -1869,13 +1898,17 @@ current_not_ready:
                         util_queue_fence_wait(&previous_stage_sel->ready);
         }
  
-       /* Compile the main shader part if it doesn't exist. This can happen
-        * if the initial guess was wrong. */
         bool is_pure_monolithic =
                 sscreen->use_monolithic_shaders ||
                 memcmp(&key->mono, &zeroed.mono, sizeof(key->mono)) != 0;
  
-       if (!is_pure_monolithic) {
+       /* Compile the main shader part if it doesn't exist. This can happen
+        * if the initial guess was wrong.
+        *
+        * The prim discard CS doesn't need the main shader part.
+        */
+       if (!is_pure_monolithic &&
+           !key->opt.vs_as_prim_discard_cs) {
                 bool ok;
  
                 /* Make sure the main shader part is present. This is needed
@@ -1926,14 +1959,13 @@ current_not_ready:
                 is_pure_monolithic ||
                 memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
  
+       /* The prim discard CS is always optimized. */
         shader->is_optimized =
-               !is_pure_monolithic &&
-               memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
+               (!is_pure_monolithic || key->opt.vs_as_prim_discard_cs) &&
+                memcmp(&key->opt, &zeroed.opt, sizeof(key->opt)) != 0;
  
         /* If it's an optimized shader, compile it asynchronously. */
-       if (shader->is_optimized &&
-           !is_pure_monolithic &&
-           thread_index < 0) {
+       if (shader->is_optimized && thread_index < 0) {
                 /* Compile it asynchronously. */
                 util_queue_add_job(&sscreen->shader_compiler_queue_low_priority,
                                    shader, &shader->ready,
@@ -1956,6 +1988,8 @@ current_not_ready:
                 if (sscreen->options.sync_compile)
                         util_queue_fence_wait(&shader->ready);
  
+               if (optimized_or_none)
+                       return -1;
                 goto again;
         }
  
@@ -1992,7 +2026,7 @@ static int si_shader_select(struct pipe_context *ctx,
  
         si_shader_selector_key(ctx, state->cso, &key);
         return si_shader_select_with_key(sctx->screen, state, compiler_state,
-                                        &key, -1);
+                                        &key, -1, false);
  }
  
  static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
@@ -2082,7 +2116,7 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                 if (ir_binary &&
                     si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
                         mtx_unlock(&sscreen->shader_cache_mutex);
-                       si_shader_dump_stats_for_shader_db(shader, debug);
+                       si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
                 } else {
                         mtx_unlock(&sscreen->shader_cache_mutex);
  
@@ -2247,6 +2281,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
  
                 sel->nir = state->ir.nir;
  
+               si_nir_opts(sel->nir);
                 si_nir_scan_shader(sel->nir, &sel->info);
                 si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
         }
@@ -2275,6 +2310,15 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                 sel->info.uses_kill &&
                 sctx->screen->debug_flags & DBG(FS_CORRECT_DERIVS_AFTER_KILL);
  
+       sel->prim_discard_cs_allowed =
+               sel->type == PIPE_SHADER_VERTEX &&
+               !sel->info.uses_bindless_images &&
+               !sel->info.uses_bindless_samplers &&
+               !sel->info.writes_memory &&
+               !sel->info.writes_viewport_index &&
+               !sel->info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION] &&
+               !sel->so.num_outputs;
+
         /* Set which opcode uses which (i,j) pair. */
         if (sel->info.uses_persp_opcode_interp_centroid)
                 sel->info.uses_persp_centroid = true;
@@ -2682,10 +2726,10 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
                 switch (shader->selector->type) {
                 case PIPE_SHADER_VERTEX:
                         if (shader->key.as_ls) {
-                               assert(sctx->chip_class <= VI);
+                               assert(sctx->chip_class <= GFX8);
                                 si_pm4_delete_state(sctx, ls, shader->pm4);
                         } else if (shader->key.as_es) {
-                               assert(sctx->chip_class <= VI);
+                               assert(sctx->chip_class <= GFX8);
                                 si_pm4_delete_state(sctx, es, shader->pm4);
                         } else {
                                 si_pm4_delete_state(sctx, vs, shader->pm4);
@@ -2696,7 +2740,7 @@ static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
                         break;
                 case PIPE_SHADER_TESS_EVAL:
                         if (shader->key.as_es) {
-                               assert(sctx->chip_class <= VI);
+                               assert(sctx->chip_class <= GFX8);
                                 si_pm4_delete_state(sctx, es, shader->pm4);
                         } else {
                                 si_pm4_delete_state(sctx, vs, shader->pm4);
@@ -2776,7 +2820,8 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx,
         unsigned j, offset, ps_input_cntl = 0;
  
         if (interpolate == TGSI_INTERPOLATE_CONSTANT ||
-           (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade))
+           (interpolate == TGSI_INTERPOLATE_COLOR && sctx->flatshade) ||
+           name == TGSI_SEMANTIC_PRIMID)
                 ps_input_cntl |= S_028644_FLAT_SHADE(1);
  
         if (name == TGSI_SEMANTIC_PCOORD ||
@@ -2877,7 +2922,7 @@ static void si_emit_spi_map(struct si_context *sctx)
                                     sctx->tracked_regs.spi_ps_input_cntl, num_interp);
  
         if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll_counter++;
+               sctx->context_roll = true;
  }
  
  /**
@@ -2913,10 +2958,10 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
         unsigned num_se = sctx->screen->info.max_se;
         unsigned wave_size = 64;
         unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
-       /* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
-        * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+       /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+        * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
          */
-       unsigned gs_vertex_reuse = (sctx->chip_class >= VI ? 32 : 16) * num_se;
+       unsigned gs_vertex_reuse = (sctx->chip_class >= GFX8 ? 32 : 16) * num_se;
         unsigned alignment = 256 * num_se;
         /* The maximum size is 63.999 MB per SE. */
         unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
@@ -2943,7 +2988,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
          *
          * GFX9 doesn't have the ESGS ring.
          */
-       bool update_esgs = sctx->chip_class <= VI &&
+       bool update_esgs = sctx->chip_class <= GFX8 &&
                            esgs_ring_size &&
                            (!sctx->esgs_ring ||
                             sctx->esgs_ring->width0 < esgs_ring_size);
@@ -2981,9 +3026,9 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
         if (!pm4)
                 return false;
  
-       if (sctx->chip_class >= CIK) {
+       if (sctx->chip_class >= GFX7) {
                 if (sctx->esgs_ring) {
-                       assert(sctx->chip_class <= VI);
+                       assert(sctx->chip_class <= GFX8);
                         si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
                                        sctx->esgs_ring->width0 / 256);
                 }
@@ -3015,7 +3060,7 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
  
         /* Set ring bindings. */
         if (sctx->esgs_ring) {
-               assert(sctx->chip_class <= VI);
+               assert(sctx->chip_class <= GFX8);
                 si_set_ring_buffer(sctx, SI_ES_RING_ESGS,
                                    sctx->esgs_ring, 0, sctx->esgs_ring->width0,
                                    true, true, 4, 64, 0);
@@ -3057,7 +3102,6 @@ static int si_update_scratch_buffer(struct si_context *sctx,
                                     struct si_shader *shader)
  {
         uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
-       int r;
  
         if (!shader)
                 return 0;
@@ -3082,16 +3126,10 @@ static int si_update_scratch_buffer(struct si_context *sctx,
  
         assert(sctx->scratch_buffer);
  
-       if (shader->previous_stage)
-               si_shader_apply_scratch_relocs(shader->previous_stage, scratch_va);
-
-       si_shader_apply_scratch_relocs(shader, scratch_va);
-
         /* Replace the shader bo with a new bo that has the relocs applied. */
-       r = si_shader_binary_upload(sctx->screen, shader);
-       if (r) {
+       if (!si_shader_binary_upload(sctx->screen, shader, scratch_va)) {
                 si_shader_unlock(shader);
-               return r;
+               return -1;
         }
  
         /* Update the shader state to use the new shader bo. */
@@ -3264,7 +3302,7 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
                              sctx->screen->tess_offchip_ring_size;
  
         /* Append these registers to the init config state. */
-       if (sctx->chip_class >= CIK) {
+       if (sctx->chip_class >= GFX7) {
                 si_pm4_set_reg(sctx->init_config, R_030938_VGT_TF_RING_SIZE,
                                S_030938_SIZE(sctx->screen->tess_factor_ring_size / 4));
                 si_pm4_set_reg(sctx->init_config, R_030940_VGT_TF_MEMORY_BASE,
@@ -3352,7 +3390,7 @@ bool si_update_shaders(struct si_context *sctx)
                 }
  
                 /* VS as LS */
-               if (sctx->chip_class <= VI) {
+               if (sctx->chip_class <= GFX8) {
                         r = si_shader_select(ctx, &sctx->vs_shader,
                                              &compiler_state);
                         if (r)
@@ -3384,7 +3422,7 @@ bool si_update_shaders(struct si_context *sctx)
  
                 if (sctx->gs_shader.cso) {
                         /* TES as ES */
-                       if (sctx->chip_class <= VI) {
+                       if (sctx->chip_class <= GFX8) {
                                 r = si_shader_select(ctx, &sctx->tes_shader,
                                                      &compiler_state);
                                 if (r)
@@ -3400,7 +3438,7 @@ bool si_update_shaders(struct si_context *sctx)
                         si_pm4_bind_state(sctx, vs, sctx->tes_shader.current->pm4);
                 }
         } else if (sctx->gs_shader.cso) {
-               if (sctx->chip_class <= VI) {
+               if (sctx->chip_class <= GFX8) {
                         /* VS as ES */
                         r = si_shader_select(ctx, &sctx->vs_shader,
                                              &compiler_state);
@@ -3433,7 +3471,7 @@ bool si_update_shaders(struct si_context *sctx)
                         return false;
         } else {
                 si_pm4_bind_state(sctx, gs, NULL);
-               if (sctx->chip_class <= VI)
+               if (sctx->chip_class <= GFX8)
                         si_pm4_bind_state(sctx, es, NULL);
         }
  
@@ -3480,7 +3518,7 @@ bool si_update_shaders(struct si_context *sctx)
                         sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
                         si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config);
  
-                       if (sctx->chip_class == SI)
+                       if (sctx->chip_class == GFX6)
                                 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
  
                         if (sctx->framebuffer.nr_samples <= 1)
@@ -3498,7 +3536,7 @@ bool si_update_shaders(struct si_context *sctx)
                         return false;
         }
  
-       if (sctx->chip_class >= CIK) {
+       if (sctx->chip_class >= GFX7) {
                 if (si_pm4_state_enabled_and_changed(sctx, ls))
                         sctx->prefetch_L2_mask |= SI_PREFETCH_LS;
                 else if (!sctx->queued.named.ls)