radeonsi: don't allow draw calls with uninitialized VS inputs
[mesa.git] / src / gallium / drivers / radeonsi / si_state_shaders.c
index a6307950400a765d4f64b18b7b995d122908528d..18015bbec485f74bfffdd202d240d112ce64c808 100644 (file)
@@ -27,7 +27,6 @@
 
 #include "compiler/nir/nir_serialize.h"
 #include "nir/tgsi_to_nir.h"
-#include "tgsi/tgsi_parse.h"
 #include "util/hash_table.h"
 #include "util/crc32.h"
 #include "util/u_async_debug.h"
 /* SHADER_CACHE */
 
 /**
- * Return the IR binary in a buffer. For TGSI the first 4 bytes contain its
- * size as integer.
+ * Return the IR key for the shader cache.
  */
-void *si_get_ir_binary(struct si_shader_selector *sel, bool ngg, bool es)
+void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
+                        unsigned char ir_sha1_cache_key[20])
 {
-       struct blob blob;
+       struct blob blob = {};
        unsigned ir_size;
        void *ir_binary;
 
-       if (sel->tokens) {
-               ir_binary = sel->tokens;
-               ir_size = tgsi_num_tokens(sel->tokens) *
-                                         sizeof(struct tgsi_token);
+       if (sel->nir_binary) {
+               ir_binary = sel->nir_binary;
+               ir_size = sel->nir_size;
        } else {
                assert(sel->nir);
 
@@ -78,20 +76,18 @@ void *si_get_ir_binary(struct si_shader_selector *sel, bool ngg, bool es)
        if (sel->force_correct_derivs_after_kill)
                shader_variant_flags |= 1 << 3;
 
-       unsigned size = 4 + 4 + ir_size + sizeof(sel->so);
-       char *result = (char*)MALLOC(size);
-       if (!result)
-               return NULL;
-
-       ((uint32_t*)result)[0] = size;
-       ((uint32_t*)result)[1] = shader_variant_flags;
-       memcpy(result + 8, ir_binary, ir_size);
-       memcpy(result + 8 + ir_size, &sel->so, sizeof(sel->so));
-
-       if (sel->nir)
+       struct mesa_sha1 ctx;
+       _mesa_sha1_init(&ctx);
+       _mesa_sha1_update(&ctx, &shader_variant_flags, 4);
+       _mesa_sha1_update(&ctx, ir_binary, ir_size);
+       if (sel->type == PIPE_SHADER_VERTEX ||
+           sel->type == PIPE_SHADER_TESS_EVAL ||
+           sel->type == PIPE_SHADER_GEOMETRY)
+               _mesa_sha1_update(&ctx, &sel->so, sizeof(sel->so));
+       _mesa_sha1_final(&ctx, ir_sha1_cache_key);
+
+       if (ir_binary == blob.data)
                blob_finish(&blob);
-
-       return result;
 }
 
 /** Copy "data" to "ptr" and return the next dword following copied data. */
@@ -208,10 +204,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary)
 /**
  * Insert a shader into the cache. It's assumed the shader is not in the cache.
  * Use si_shader_cache_load_shader before calling this.
- *
- * Returns false on failure, in which case the ir_binary should be freed.
  */
-bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary,
+void si_shader_cache_insert_shader(struct si_screen *sscreen,
+                                  unsigned char ir_sha1_cache_key[20],
                                   struct si_shader *shader,
                                   bool insert_into_disk_cache)
 {
@@ -219,42 +214,41 @@ bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary,
        struct hash_entry *entry;
        uint8_t key[CACHE_KEY_SIZE];
 
-       entry = _mesa_hash_table_search(sscreen->shader_cache, ir_binary);
+       entry = _mesa_hash_table_search(sscreen->shader_cache, ir_sha1_cache_key);
        if (entry)
-               return false; /* already added */
+               return; /* already added */
 
        hw_binary = si_get_shader_binary(shader);
        if (!hw_binary)
-               return false;
+               return;
 
-       if (_mesa_hash_table_insert(sscreen->shader_cache, ir_binary,
+       if (_mesa_hash_table_insert(sscreen->shader_cache,
+                                   mem_dup(ir_sha1_cache_key, 20),
                                    hw_binary) == NULL) {
                FREE(hw_binary);
-               return false;
+               return;
        }
 
        if (sscreen->disk_shader_cache && insert_into_disk_cache) {
-               disk_cache_compute_key(sscreen->disk_shader_cache, ir_binary,
-                                      *((uint32_t *)ir_binary), key);
+               disk_cache_compute_key(sscreen->disk_shader_cache,
+                                      ir_sha1_cache_key, 20, key);
                disk_cache_put(sscreen->disk_shader_cache, key, hw_binary,
                               *((uint32_t *) hw_binary), NULL);
        }
-
-       return true;
 }
 
-bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
+bool si_shader_cache_load_shader(struct si_screen *sscreen,
+                                unsigned char ir_sha1_cache_key[20],
                                 struct si_shader *shader)
 {
        struct hash_entry *entry =
-               _mesa_hash_table_search(sscreen->shader_cache, ir_binary);
+               _mesa_hash_table_search(sscreen->shader_cache, ir_sha1_cache_key);
        if (!entry) {
                if (sscreen->disk_shader_cache) {
                        unsigned char sha1[CACHE_KEY_SIZE];
-                       size_t tg_size = *((uint32_t *) ir_binary);
 
                        disk_cache_compute_key(sscreen->disk_shader_cache,
-                                              ir_binary, tg_size, sha1);
+                                              ir_sha1_cache_key, 20, sha1);
 
                        size_t binary_size;
                        uint8_t *buffer =
@@ -285,16 +279,13 @@ bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
                        }
                        free(buffer);
 
-                       if (!si_shader_cache_insert_shader(sscreen, ir_binary,
-                                                          shader, false))
-                               FREE(ir_binary);
+                       si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key,
+                                                     shader, false);
                } else {
                        return false;
                }
        } else {
-               if (si_load_shader_binary(shader, entry->data))
-                       FREE(ir_binary);
-               else
+               if (!si_load_shader_binary(shader, entry->data))
                        return false;
        }
        p_atomic_inc(&sscreen->num_shader_cache_hits);
@@ -303,20 +294,14 @@ bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary,
 
 static uint32_t si_shader_cache_key_hash(const void *key)
 {
-       /* The first dword is the key size. */
-       return util_hash_crc32(key, *(uint32_t*)key);
+       /* Take the first dword of SHA1. */
+       return *(uint32_t*)key;
 }
 
 static bool si_shader_cache_key_equals(const void *a, const void *b)
 {
-       uint32_t *keya = (uint32_t*)a;
-       uint32_t *keyb = (uint32_t*)b;
-
-       /* The first dword is the key size. */
-       if (*keya != *keyb)
-               return false;
-
-       return memcmp(keya, keyb, *keya) == 0;
+       /* Compare SHA1s. */
+       return memcmp(a, b, 20) == 0;
 }
 
 static void si_destroy_shader_cache_entry(struct hash_entry *entry)
@@ -1212,7 +1197,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
 
        shader->ctx_reg.ngg.vgt_primitiveid_en =
                S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
-               S_028A84_NGG_DISABLE_PROVOK_REUSE(es_enable_prim_id);
+               S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.mono.u.vs_export_prim_id ||
+                                                 gs_sel->info.writes_primid);
 
        if (gs_type == PIPE_SHADER_GEOMETRY) {
                shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->esgs_itemsize / 4;
@@ -1250,7 +1236,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
 
        shader->ge_cntl =
                S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) |
-               S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) |
+               S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
                S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
 
        /* Bug workaround for a possible hang with non-tessellation cases.
@@ -1326,9 +1312,6 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                           SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL,
                                           shader->vgt_vertex_reuse_block_cntl);
 
-       if (initial_cdw != sctx->gfx_cs->current.cdw)
-               sctx->context_roll = true;
-
        /* Required programming for tessellation. (legacy pipeline only) */
        if (sctx->chip_class == GFX10 &&
            shader->selector->type == PIPE_SHADER_TESS_EVAL) {
@@ -1345,6 +1328,9 @@ static void si_emit_shader_vs(struct si_context *sctx)
                                               shader->pa_cl_vs_out_cntl,
                                               SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
        }
+
+       if (initial_cdw != sctx->gfx_cs->current.cdw)
+               sctx->context_roll = true;
 }
 
 /**
@@ -2162,7 +2148,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
                main_part->key.as_ngg = key->as_ngg;
                main_part->is_monolithic = false;
 
-               if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
+               if (si_compile_shader(sscreen, compiler_state->compiler,
                                           main_part, &compiler_state->debug) != 0) {
                        FREE(main_part);
                        return false;
@@ -2478,13 +2464,30 @@ static void si_init_shader_selector_async(void *job, int thread_index)
        if (!compiler->passes)
                si_init_compiler(sscreen, compiler);
 
+       /* Serialize NIR to save memory. Monolithic shader variants
+        * have to deserialize NIR before compilation.
+        */
+       if (sel->nir) {
+               struct blob blob;
+                size_t size;
+
+               blob_init(&blob);
+               /* true = remove optional debugging data to increase
+                * the likehood of getting more shader cache hits.
+                * It also drops variable names, so we'll save more memory.
+                */
+               nir_serialize(&blob, sel->nir, true);
+               blob_finish_get_buffer(&blob, &sel->nir_binary, &size);
+               sel->nir_size = size;
+       }
+
        /* Compile the main shader part for use with a prolog and/or epilog.
         * If this fails, the driver will try to compile a monolithic shader
         * on demand.
         */
        if (!sscreen->use_monolithic_shaders) {
                struct si_shader *shader = CALLOC_STRUCT(si_shader);
-               void *ir_binary = NULL;
+               unsigned char ir_sha1_cache_key[20];
 
                if (!shader) {
                        fprintf(stderr, "radeonsi: can't allocate a main shader part\n");
@@ -2508,36 +2511,32 @@ static void si_init_shader_selector_async(void *job, int thread_index)
                     sel->type == PIPE_SHADER_GEOMETRY))
                        shader->key.as_ngg = 1;
 
-               if (sel->tokens || sel->nir) {
-                       ir_binary = si_get_ir_binary(sel, shader->key.as_ngg,
-                                                    shader->key.as_es);
+               if (sel->nir) {
+                       si_get_ir_cache_key(sel, shader->key.as_ngg,
+                                           shader->key.as_es, ir_sha1_cache_key);
                }
 
                /* Try to load the shader from the shader cache. */
                simple_mtx_lock(&sscreen->shader_cache_mutex);
 
-               if (ir_binary &&
-                   si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
+               if (si_shader_cache_load_shader(sscreen, ir_sha1_cache_key, shader)) {
                        simple_mtx_unlock(&sscreen->shader_cache_mutex);
                        si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
                } else {
                        simple_mtx_unlock(&sscreen->shader_cache_mutex);
 
                        /* Compile the shader if it hasn't been loaded from the cache. */
-                       if (si_compile_tgsi_shader(sscreen, compiler, shader,
+                       if (si_compile_shader(sscreen, compiler, shader,
                                                   debug) != 0) {
                                FREE(shader);
-                               FREE(ir_binary);
                                fprintf(stderr, "radeonsi: can't compile a main shader part\n");
                                return;
                        }
 
-                       if (ir_binary) {
-                               simple_mtx_lock(&sscreen->shader_cache_mutex);
-                               if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
-                                       FREE(ir_binary);
-                               simple_mtx_unlock(&sscreen->shader_cache_mutex);
-                       }
+                       simple_mtx_lock(&sscreen->shader_cache_mutex);
+                       si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key,
+                                                     shader, true);
+                       simple_mtx_unlock(&sscreen->shader_cache_mutex);
                }
 
                *si_get_main_shader_part(sel, &shader->key) = shader;
@@ -2598,6 +2597,12 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 
                si_shader_vs(sscreen, sel->gs_copy_shader, sel);
        }
+
+       /* Free NIR. We only keep serialized NIR after this point. */
+       if (sel->nir) {
+               ralloc_free(sel->nir);
+               sel->nir = NULL;
+       }
 }
 
 void si_schedule_initial_compile(struct si_context *sctx, unsigned processor,
@@ -2685,44 +2690,17 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
        sel->so = state->stream_output;
 
-       if (state->type == PIPE_SHADER_IR_TGSI &&
-           !sscreen->options.enable_nir) {
-               sel->tokens = tgsi_dup_tokens(state->tokens);
-               if (!sel->tokens) {
-                       FREE(sel);
-                       return NULL;
-               }
-
-               tgsi_scan_shader(state->tokens, &sel->info);
-               tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info);
-
-               /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */
-               if (sel->info.uses_persp_opcode_interp_centroid)
-                       sel->info.uses_persp_centroid = true;
-
-               if (sel->info.uses_linear_opcode_interp_centroid)
-                       sel->info.uses_linear_centroid = true;
-
-               if (sel->info.uses_persp_opcode_interp_offset ||
-                   sel->info.uses_persp_opcode_interp_sample)
-                       sel->info.uses_persp_center = true;
-
-               if (sel->info.uses_linear_opcode_interp_offset ||
-                   sel->info.uses_linear_opcode_interp_sample)
-                       sel->info.uses_linear_center = true;
+       if (state->type == PIPE_SHADER_IR_TGSI) {
+               sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
        } else {
-               if (state->type == PIPE_SHADER_IR_TGSI) {
-                       sel->nir = tgsi_to_nir(state->tokens, ctx->screen);
-               } else {
-                       assert(state->type == PIPE_SHADER_IR_NIR);
-                       sel->nir = state->ir.nir;
-               }
-
-               si_nir_scan_shader(sel->nir, &sel->info);
-               si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
-               si_nir_adjust_driver_locations(sel->nir);
+               assert(state->type == PIPE_SHADER_IR_NIR);
+               sel->nir = state->ir.nir;
        }
 
+       si_nir_scan_shader(sel->nir, &sel->info);
+       si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info);
+       si_nir_adjust_driver_locations(sel->nir);
+
        sel->type = sel->info.processor;
        p_atomic_inc(&sscreen->num_shaders_created);
        si_get_active_slot_masks(&sel->info,
@@ -2736,6 +2714,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
                        (sel->so.output[i].stream * 4);
        }
 
+       sel->num_vs_inputs = sel->type == PIPE_SHADER_VERTEX &&
+                            !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] ?
+                                    sel->info.num_inputs : 0;
+
        /* The prolog is a no-op if there are no inputs. */
        sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX &&
                               sel->info.num_inputs &&
@@ -2784,9 +2766,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 
                /* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation. */
                sel->tess_turns_off_ngg =
-                       (sscreen->info.family == CHIP_NAVI10 ||
-                        sscreen->info.family == CHIP_NAVI12 ||
-                        sscreen->info.family == CHIP_NAVI14) &&
+                       sscreen->info.chip_class == GFX10 &&
                        sel->gs_num_invocations * sel->gs_max_out_vertices > 256;
                break;
 
@@ -3074,7 +3054,7 @@ bool si_update_ngg(struct si_context *sctx)
                        sctx->flags |= SI_CONTEXT_VGT_FLUSH;
 
                sctx->ngg = new_ngg;
-               sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+               sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
                return true;
        }
        return false;
@@ -3097,7 +3077,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
        sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL;
 
        si_update_common_shader_state(sctx);
-       sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+       sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
 
        ngg_changed = si_update_ngg(sctx);
        if (ngg_changed || enable_changed)
@@ -3151,7 +3131,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
        si_update_tess_uses_prim_id(sctx);
 
        si_update_common_shader_state(sctx);
-       sctx->last_rast_prim = -1; /* reset this so that it gets updated */
+       sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
 
        bool ngg_changed = si_update_ngg(sctx);
        if (ngg_changed || enable_changed)
@@ -3296,8 +3276,8 @@ void si_destroy_shader_selector(struct si_context *sctx,
 
        util_queue_fence_destroy(&sel->ready);
        simple_mtx_destroy(&sel->mutex);
-       free(sel->tokens);
        ralloc_free(sel->nir);
+       free(sel->nir_binary);
        free(sel);
 }
 
@@ -3864,9 +3844,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen,
        }
 
        if (key.u.ngg) {
-               stages |= S_028B54_PRIMGEN_EN(1);
-               if (key.u.streamout)
-                       stages |= S_028B54_NGG_WAVE_ID_EN(1);
+               stages |= S_028B54_PRIMGEN_EN(1) |
+                         S_028B54_NGG_WAVE_ID_EN(key.u.streamout) |
+                         S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough);
        } else if (key.u.gs)
                stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 
@@ -3906,6 +3886,9 @@ bool si_update_shaders(struct si_context *sctx)
                old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
        int r;
 
+       if (!sctx->compiler.passes)
+               si_init_compiler(sctx->screen, &sctx->compiler);
+
        compiler_state.compiler = &sctx->compiler;
        compiler_state.debug = sctx->debug;
        compiler_state.is_debug_context = sctx->is_debug;
@@ -4016,6 +3999,10 @@ bool si_update_shaders(struct si_context *sctx)
                }
        }
 
+       /* This must be done after the shader variant is selected. */
+       if (sctx->ngg)
+               key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current);
+
        si_update_vgt_shader_config(sctx, key);
 
        if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)