X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_state_shaders.c;h=18015bbec485f74bfffdd202d240d112ce64c808;hb=312e04689a9d8d4f9c319e69c61220e10653cfcd;hp=832e59828949c0da0279d2c95a2c5ffd3fba5722;hpb=896885025f37484cc693f480ee797e83e3a6f9ee;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 832e5982894..18015bbec48 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -27,7 +27,6 @@ #include "compiler/nir/nir_serialize.h" #include "nir/tgsi_to_nir.h" -#include "tgsi/tgsi_parse.h" #include "util/hash_table.h" #include "util/crc32.h" #include "util/u_async_debug.h" @@ -42,24 +41,23 @@ /* SHADER_CACHE */ /** - * Return the IR binary in a buffer. For TGSI the first 4 bytes contain its - * size as integer. + * Return the IR key for the shader cache. */ -void *si_get_ir_binary(struct si_shader_selector *sel, bool ngg, bool es) +void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, + unsigned char ir_sha1_cache_key[20]) { - struct blob blob; + struct blob blob = {}; unsigned ir_size; void *ir_binary; - if (sel->tokens) { - ir_binary = sel->tokens; - ir_size = tgsi_num_tokens(sel->tokens) * - sizeof(struct tgsi_token); + if (sel->nir_binary) { + ir_binary = sel->nir_binary; + ir_size = sel->nir_size; } else { assert(sel->nir); blob_init(&blob); - nir_serialize(&blob, sel->nir); + nir_serialize(&blob, sel->nir, true); ir_binary = blob.data; ir_size = blob.size; } @@ -78,20 +76,18 @@ void *si_get_ir_binary(struct si_shader_selector *sel, bool ngg, bool es) if (sel->force_correct_derivs_after_kill) shader_variant_flags |= 1 << 3; - unsigned size = 4 + 4 + ir_size + sizeof(sel->so); - char *result = (char*)MALLOC(size); - if (!result) - return NULL; - - ((uint32_t*)result)[0] = size; - ((uint32_t*)result)[1] = shader_variant_flags; - memcpy(result + 8, ir_binary, ir_size); - memcpy(result + 8 + ir_size, &sel->so, sizeof(sel->so)); - - if (sel->nir) + struct mesa_sha1 ctx; + _mesa_sha1_init(&ctx); + _mesa_sha1_update(&ctx, &shader_variant_flags, 4); + _mesa_sha1_update(&ctx, ir_binary, ir_size); + if (sel->type == PIPE_SHADER_VERTEX || + sel->type == PIPE_SHADER_TESS_EVAL || + sel->type == PIPE_SHADER_GEOMETRY) + _mesa_sha1_update(&ctx, &sel->so, sizeof(sel->so)); + _mesa_sha1_final(&ctx, ir_sha1_cache_key); + + if (ir_binary == blob.data) blob_finish(&blob); - - return result; } /** Copy "data" to "ptr" and return the next dword following copied data. */ @@ -208,10 +204,9 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary) /** * Insert a shader into the cache. It's assumed the shader is not in the cache. * Use si_shader_cache_load_shader before calling this. - * - * Returns false on failure, in which case the ir_binary should be freed. */ -bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary, +void si_shader_cache_insert_shader(struct si_screen *sscreen, + unsigned char ir_sha1_cache_key[20], struct si_shader *shader, bool insert_into_disk_cache) { @@ -219,42 +214,41 @@ bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary, struct hash_entry *entry; uint8_t key[CACHE_KEY_SIZE]; - entry = _mesa_hash_table_search(sscreen->shader_cache, ir_binary); + entry = _mesa_hash_table_search(sscreen->shader_cache, ir_sha1_cache_key); if (entry) - return false; /* already added */ + return; /* already added */ hw_binary = si_get_shader_binary(shader); if (!hw_binary) - return false; + return; - if (_mesa_hash_table_insert(sscreen->shader_cache, ir_binary, + if (_mesa_hash_table_insert(sscreen->shader_cache, + mem_dup(ir_sha1_cache_key, 20), hw_binary) == NULL) { FREE(hw_binary); - return false; + return; } if (sscreen->disk_shader_cache && insert_into_disk_cache) { - disk_cache_compute_key(sscreen->disk_shader_cache, ir_binary, - *((uint32_t *)ir_binary), key); + disk_cache_compute_key(sscreen->disk_shader_cache, + ir_sha1_cache_key, 20, key); disk_cache_put(sscreen->disk_shader_cache, key, hw_binary, *((uint32_t *) hw_binary), NULL); } - - return true; } -bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary, +bool si_shader_cache_load_shader(struct si_screen *sscreen, + unsigned char ir_sha1_cache_key[20], struct si_shader *shader) { struct hash_entry *entry = - _mesa_hash_table_search(sscreen->shader_cache, ir_binary); + _mesa_hash_table_search(sscreen->shader_cache, ir_sha1_cache_key); if (!entry) { if (sscreen->disk_shader_cache) { unsigned char sha1[CACHE_KEY_SIZE]; - size_t tg_size = *((uint32_t *) ir_binary); disk_cache_compute_key(sscreen->disk_shader_cache, - ir_binary, tg_size, sha1); + ir_sha1_cache_key, 20, sha1); size_t binary_size; uint8_t *buffer = @@ -285,16 +279,13 @@ bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary, } free(buffer); - if (!si_shader_cache_insert_shader(sscreen, ir_binary, - shader, false)) - FREE(ir_binary); + si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key, + shader, false); } else { return false; } } else { - if (si_load_shader_binary(shader, entry->data)) - FREE(ir_binary); - else + if (!si_load_shader_binary(shader, entry->data)) return false; } p_atomic_inc(&sscreen->num_shader_cache_hits); @@ -303,20 +294,14 @@ bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary, static uint32_t si_shader_cache_key_hash(const void *key) { - /* The first dword is the key size. */ - return util_hash_crc32(key, *(uint32_t*)key); + /* Take the first dword of SHA1. */ + return *(uint32_t*)key; } static bool si_shader_cache_key_equals(const void *a, const void *b) { - uint32_t *keya = (uint32_t*)a; - uint32_t *keyb = (uint32_t*)b; - - /* The first dword is the key size. */ - if (*keya != *keyb) - return false; - - return memcmp(keya, keyb, *keya) == 0; + /* Compare SHA1s. */ + return memcmp(a, b, 20) == 0; } static void si_destroy_shader_cache_entry(struct hash_entry *entry) @@ -327,7 +312,7 @@ static void si_destroy_shader_cache_entry(struct hash_entry *entry) bool si_init_shader_cache(struct si_screen *sscreen) { - (void) mtx_init(&sscreen->shader_cache_mutex, mtx_plain); + (void) simple_mtx_init(&sscreen->shader_cache_mutex, mtx_plain); sscreen->shader_cache = _mesa_hash_table_create(NULL, si_shader_cache_key_hash, @@ -341,7 +326,7 @@ void si_destroy_shader_cache(struct si_screen *sscreen) if (sscreen->shader_cache) _mesa_hash_table_destroy(sscreen->shader_cache, si_destroy_shader_cache_entry); - mtx_destroy(&sscreen->shader_cache_mutex); + simple_mtx_destroy(&sscreen->shader_cache_mutex); } /* SHADER STATES */ @@ -1212,7 +1197,8 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader shader->ctx_reg.ngg.vgt_primitiveid_en = S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | - S_028A84_NGG_DISABLE_PROVOK_REUSE(es_enable_prim_id); + S_028A84_NGG_DISABLE_PROVOK_REUSE(shader->key.mono.u.vs_export_prim_id || + gs_sel->info.writes_primid); if (gs_type == PIPE_SHADER_GEOMETRY) { shader->ctx_reg.ngg.vgt_esgs_ring_itemsize = es_sel->esgs_itemsize / 4; @@ -1250,7 +1236,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader shader->ge_cntl = S_03096C_PRIM_GRP_SIZE(shader->ngg.max_gsprims) | - S_03096C_VERT_GRP_SIZE(shader->ngg.hw_max_esverts) | + S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */ S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); /* Bug workaround for a possible hang with non-tessellation cases. @@ -1326,9 +1312,6 @@ static void si_emit_shader_vs(struct si_context *sctx) SI_TRACKED_VGT_VERTEX_REUSE_BLOCK_CNTL, shader->vgt_vertex_reuse_block_cntl); - if (initial_cdw != sctx->gfx_cs->current.cdw) - sctx->context_roll = true; - /* Required programming for tessellation. (legacy pipeline only) */ if (sctx->chip_class == GFX10 && shader->selector->type == PIPE_SHADER_TESS_EVAL) { @@ -1345,6 +1328,9 @@ static void si_emit_shader_vs(struct si_context *sctx) shader->pa_cl_vs_out_cntl, SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); } + + if (initial_cdw != sctx->gfx_cs->current.cdw) + sctx->context_roll = true; } /** @@ -2105,6 +2091,9 @@ static void si_build_shader_variant(struct si_shader *shader, compiler = shader->compiler_ctx_state.compiler; } + if (!compiler->passes) + si_init_compiler(sscreen, compiler); + if (unlikely(!si_shader_create(sscreen, compiler, shader, debug))) { PRINT_ERR("Failed to build shader variant (type=%u)\n", sel->type); @@ -2159,7 +2148,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, main_part->key.as_ngg = key->as_ngg; main_part->is_monolithic = false; - if (si_compile_tgsi_shader(sscreen, compiler_state->compiler, + if (si_compile_shader(sscreen, compiler_state->compiler, main_part, &compiler_state->debug) != 0) { FREE(main_part); return false; @@ -2221,14 +2210,14 @@ current_not_ready: if (thread_index < 0) util_queue_fence_wait(&sel->ready); - mtx_lock(&sel->mutex); + simple_mtx_lock(&sel->mutex); /* Find the shader variant. */ for (iter = sel->first_variant; iter; iter = iter->next_variant) { /* Don't check the "current" shader. We checked it above. */ if (current != iter && memcmp(&iter->key, key, sizeof(*key)) == 0) { - mtx_unlock(&sel->mutex); + simple_mtx_unlock(&sel->mutex); if (unlikely(!util_queue_fence_is_signalled(&iter->ready))) { /* If it's an optimized shader and its compilation has @@ -2257,7 +2246,7 @@ current_not_ready: /* Build a new shader. */ shader = CALLOC_STRUCT(si_shader); if (!shader) { - mtx_unlock(&sel->mutex); + simple_mtx_unlock(&sel->mutex); return -ENOMEM; } @@ -2314,11 +2303,11 @@ current_not_ready: assert(0); } - mtx_lock(&previous_stage_sel->mutex); + simple_mtx_lock(&previous_stage_sel->mutex); ok = si_check_missing_main_part(sscreen, previous_stage_sel, compiler_state, &shader1_key); - mtx_unlock(&previous_stage_sel->mutex); + simple_mtx_unlock(&previous_stage_sel->mutex); } if (ok) { @@ -2328,7 +2317,7 @@ current_not_ready: if (!ok) { FREE(shader); - mtx_unlock(&sel->mutex); + simple_mtx_unlock(&sel->mutex); return -ENOMEM; /* skip the draw call */ } } @@ -2373,7 +2362,7 @@ current_not_ready: /* Use the default (unoptimized) shader for now. */ memset(&key->opt, 0, sizeof(key->opt)); - mtx_unlock(&sel->mutex); + simple_mtx_unlock(&sel->mutex); if (sscreen->options.sync_compile) util_queue_fence_wait(&shader->ready); @@ -2394,7 +2383,7 @@ current_not_ready: sel->last_variant = shader; } - mtx_unlock(&sel->mutex); + simple_mtx_unlock(&sel->mutex); assert(!shader->is_optimized); si_build_shader_variant(shader, thread_index, false); @@ -2472,8 +2461,25 @@ static void si_init_shader_selector_async(void *job, int thread_index) assert(thread_index < ARRAY_SIZE(sscreen->compiler)); compiler = &sscreen->compiler[thread_index]; - if (sel->nir) - si_lower_nir(sel); + if (!compiler->passes) + si_init_compiler(sscreen, compiler); + + /* Serialize NIR to save memory. Monolithic shader variants + * have to deserialize NIR before compilation. + */ + if (sel->nir) { + struct blob blob; + size_t size; + + blob_init(&blob); + /* true = remove optional debugging data to increase + * the likehood of getting more shader cache hits. + * It also drops variable names, so we'll save more memory. + */ + nir_serialize(&blob, sel->nir, true); + blob_finish_get_buffer(&blob, &sel->nir_binary, &size); + sel->nir_size = size; + } /* Compile the main shader part for use with a prolog and/or epilog. * If this fails, the driver will try to compile a monolithic shader @@ -2481,7 +2487,7 @@ static void si_init_shader_selector_async(void *job, int thread_index) */ if (!sscreen->use_monolithic_shaders) { struct si_shader *shader = CALLOC_STRUCT(si_shader); - void *ir_binary = NULL; + unsigned char ir_sha1_cache_key[20]; if (!shader) { fprintf(stderr, "radeonsi: can't allocate a main shader part\n"); @@ -2505,36 +2511,32 @@ static void si_init_shader_selector_async(void *job, int thread_index) sel->type == PIPE_SHADER_GEOMETRY)) shader->key.as_ngg = 1; - if (sel->tokens || sel->nir) { - ir_binary = si_get_ir_binary(sel, shader->key.as_ngg, - shader->key.as_es); + if (sel->nir) { + si_get_ir_cache_key(sel, shader->key.as_ngg, + shader->key.as_es, ir_sha1_cache_key); } /* Try to load the shader from the shader cache. */ - mtx_lock(&sscreen->shader_cache_mutex); + simple_mtx_lock(&sscreen->shader_cache_mutex); - if (ir_binary && - si_shader_cache_load_shader(sscreen, ir_binary, shader)) { - mtx_unlock(&sscreen->shader_cache_mutex); + if (si_shader_cache_load_shader(sscreen, ir_sha1_cache_key, shader)) { + simple_mtx_unlock(&sscreen->shader_cache_mutex); si_shader_dump_stats_for_shader_db(sscreen, shader, debug); } else { - mtx_unlock(&sscreen->shader_cache_mutex); + simple_mtx_unlock(&sscreen->shader_cache_mutex); /* Compile the shader if it hasn't been loaded from the cache. */ - if (si_compile_tgsi_shader(sscreen, compiler, shader, + if (si_compile_shader(sscreen, compiler, shader, debug) != 0) { FREE(shader); - FREE(ir_binary); fprintf(stderr, "radeonsi: can't compile a main shader part\n"); return; } - if (ir_binary) { - mtx_lock(&sscreen->shader_cache_mutex); - if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true)) - FREE(ir_binary); - mtx_unlock(&sscreen->shader_cache_mutex); - } + simple_mtx_lock(&sscreen->shader_cache_mutex); + si_shader_cache_insert_shader(sscreen, ir_sha1_cache_key, + shader, true); + simple_mtx_unlock(&sscreen->shader_cache_mutex); } *si_get_main_shader_part(sel, &shader->key) = shader; @@ -2595,6 +2597,12 @@ static void si_init_shader_selector_async(void *job, int thread_index) si_shader_vs(sscreen, sel->gs_copy_shader, sel); } + + /* Free NIR. We only keep serialized NIR after this point. */ + if (sel->nir) { + ralloc_free(sel->nir); + sel->nir = NULL; + } } void si_schedule_initial_compile(struct si_context *sctx, unsigned processor, @@ -2633,12 +2641,13 @@ void si_get_active_slot_masks(const struct tgsi_shader_info *info, uint32_t *const_and_shader_buffers, uint64_t *samplers_and_images) { - unsigned start, num_shaderbufs, num_constbufs, num_images, num_samplers; + unsigned start, num_shaderbufs, num_constbufs, num_images, num_msaa_images, num_samplers; num_shaderbufs = util_last_bit(info->shader_buffers_declared); num_constbufs = util_last_bit(info->const_buffers_declared); /* two 8-byte images share one 16-byte slot */ num_images = align(util_last_bit(info->images_declared), 2); + num_msaa_images = align(util_last_bit(info->msaa_images_declared), 2); num_samplers = util_last_bit(info->samplers_declared); /* The layout is: sb[last] ... sb[0], cb[0] ... cb[last] */ @@ -2646,7 +2655,18 @@ void si_get_active_slot_masks(const struct tgsi_shader_info *info, *const_and_shader_buffers = u_bit_consecutive(start, num_shaderbufs + num_constbufs); - /* The layout is: image[last] ... image[0], sampler[0] ... sampler[last] */ + /* The layout is: + * - fmask[last] ... fmask[0] go to [15-last .. 15] + * - image[last] ... image[0] go to [31-last .. 31] + * - sampler[0] ... sampler[last] go to [32 .. 32+last*2] + * + * FMASKs for images are placed separately, because MSAA images are rare, + * and so we can benefit from a better cache hit rate if we keep image + * descriptors together. + */ + if (num_msaa_images) + num_images = SI_NUM_IMAGES + num_msaa_images; /* add FMASK descriptors */ + start = si_get_image_slot(num_images - 1) / 2; *samplers_and_images = u_bit_consecutive64(start, num_images / 2 + num_samplers); @@ -2670,45 +2690,17 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->so = state->stream_output; - if (state->type == PIPE_SHADER_IR_TGSI && - !sscreen->options.enable_nir) { - sel->tokens = tgsi_dup_tokens(state->tokens); - if (!sel->tokens) { - FREE(sel); - return NULL; - } - - tgsi_scan_shader(state->tokens, &sel->info); - tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info); - - /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */ - if (sel->info.uses_persp_opcode_interp_centroid) - sel->info.uses_persp_centroid = true; - - if (sel->info.uses_linear_opcode_interp_centroid) - sel->info.uses_linear_centroid = true; - - if (sel->info.uses_persp_opcode_interp_offset || - sel->info.uses_persp_opcode_interp_sample) - sel->info.uses_persp_center = true; - - if (sel->info.uses_linear_opcode_interp_offset || - sel->info.uses_linear_opcode_interp_sample) - sel->info.uses_linear_center = true; + if (state->type == PIPE_SHADER_IR_TGSI) { + sel->nir = tgsi_to_nir(state->tokens, ctx->screen); } else { - if (state->type == PIPE_SHADER_IR_TGSI) { - sel->nir = tgsi_to_nir(state->tokens, ctx->screen); - } else { - assert(state->type == PIPE_SHADER_IR_NIR); - sel->nir = state->ir.nir; - } - - si_nir_lower_ps_inputs(sel->nir); - si_nir_opts(sel->nir); - si_nir_scan_shader(sel->nir, &sel->info); - si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info); + assert(state->type == PIPE_SHADER_IR_NIR); + sel->nir = state->ir.nir; } + si_nir_scan_shader(sel->nir, &sel->info); + si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info); + si_nir_adjust_driver_locations(sel->nir); + sel->type = sel->info.processor; p_atomic_inc(&sscreen->num_shaders_created); si_get_active_slot_masks(&sel->info, @@ -2722,6 +2714,10 @@ static void *si_create_shader_selector(struct pipe_context *ctx, (sel->so.output[i].stream * 4); } + sel->num_vs_inputs = sel->type == PIPE_SHADER_VERTEX && + !sel->info.properties[TGSI_PROPERTY_VS_BLIT_SGPRS_AMD] ? + sel->info.num_inputs : 0; + /* The prolog is a no-op if there are no inputs. */ sel->vs_needs_prolog = sel->type == PIPE_SHADER_VERTEX && sel->info.num_inputs && @@ -2770,9 +2766,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, /* EN_MAX_VERT_OUT_PER_GS_INSTANCE does not work with tesselation. */ sel->tess_turns_off_ngg = - (sscreen->info.family == CHIP_NAVI10 || - sscreen->info.family == CHIP_NAVI12 || - sscreen->info.family == CHIP_NAVI14) && + sscreen->info.chip_class == GFX10 && sel->gs_num_invocations * sel->gs_max_out_vertices > 256; break; @@ -2933,7 +2927,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, if (sel->info.properties[TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE]) sel->db_shader_control |= S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(1); - (void) mtx_init(&sel->mutex, mtx_plain); + (void) simple_mtx_init(&sel->mutex, mtx_plain); si_schedule_initial_compile(sctx, sel->info.processor, &sel->ready, &sel->compiler_ctx_state, sel, @@ -3060,7 +3054,7 @@ bool si_update_ngg(struct si_context *sctx) sctx->flags |= SI_CONTEXT_VGT_FLUSH; sctx->ngg = new_ngg; - sctx->last_rast_prim = -1; /* reset this so that it gets updated */ + sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */ return true; } return false; @@ -3083,7 +3077,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state) sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL; si_update_common_shader_state(sctx); - sctx->last_rast_prim = -1; /* reset this so that it gets updated */ + sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */ ngg_changed = si_update_ngg(sctx); if (ngg_changed || enable_changed) @@ -3137,7 +3131,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state) si_update_tess_uses_prim_id(sctx); si_update_common_shader_state(sctx); - sctx->last_rast_prim = -1; /* reset this so that it gets updated */ + sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */ bool ngg_changed = si_update_ngg(sctx); if (ngg_changed || enable_changed) @@ -3281,9 +3275,9 @@ void si_destroy_shader_selector(struct si_context *sctx, si_delete_shader(sctx, sel->gs_copy_shader); util_queue_fence_destroy(&sel->ready); - mtx_destroy(&sel->mutex); - free(sel->tokens); + simple_mtx_destroy(&sel->mutex); ralloc_free(sel->nir); + free(sel->nir_binary); free(sel); } @@ -3564,18 +3558,18 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx) static void si_shader_lock(struct si_shader *shader) { - mtx_lock(&shader->selector->mutex); + simple_mtx_lock(&shader->selector->mutex); if (shader->previous_stage_sel) { assert(shader->previous_stage_sel != shader->selector); - mtx_lock(&shader->previous_stage_sel->mutex); + simple_mtx_lock(&shader->previous_stage_sel->mutex); } } static void si_shader_unlock(struct si_shader *shader) { if (shader->previous_stage_sel) - mtx_unlock(&shader->previous_stage_sel->mutex); - mtx_unlock(&shader->selector->mutex); + simple_mtx_unlock(&shader->previous_stage_sel->mutex); + simple_mtx_unlock(&shader->selector->mutex); } /** @@ -3850,9 +3844,9 @@ static struct si_pm4_state *si_build_vgt_shader_config(struct si_screen *screen, } if (key.u.ngg) { - stages |= S_028B54_PRIMGEN_EN(1); - if (key.u.streamout) - stages |= S_028B54_NGG_WAVE_ID_EN(1); + stages |= S_028B54_PRIMGEN_EN(1) | + S_028B54_NGG_WAVE_ID_EN(key.u.streamout) | + S_028B54_PRIMGEN_PASSTHRU_EN(key.u.ngg_passthrough); } else if (key.u.gs) stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); @@ -3892,6 +3886,9 @@ bool si_update_shaders(struct si_context *sctx) old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0; int r; + if (!sctx->compiler.passes) + si_init_compiler(sctx->screen, &sctx->compiler); + compiler_state.compiler = &sctx->compiler; compiler_state.debug = sctx->debug; compiler_state.is_debug_context = sctx->is_debug; @@ -4002,6 +3999,10 @@ bool si_update_shaders(struct si_context *sctx) } } + /* This must be done after the shader variant is selected. */ + if (sctx->ngg) + key.u.ngg_passthrough = gfx10_is_ngg_passthrough(si_get_vs(sctx)->current); + si_update_vgt_shader_config(sctx, key); if (old_clip_disable != si_get_vs_state(sctx)->key.opt.clip_disable)